diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index bbfb8a0dbe26a4..a48b4a538d8fe6 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -196,6 +196,11 @@ namespace llvm { /// This pass reads flow sensitive profile. extern char &MIRProfileLoaderPassID; + // This pass gives undef values a Pseudo Instruction definition for + // Instructions to ensure early-clobber is followed when using the greedy + // register allocator. + extern char &InitUndefID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 138c65785430f0..e7787aafb98e2d 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2223,6 +2223,15 @@ class TargetInstrInfo : public MCInstrInfo { llvm_unreachable("unknown number of operands necessary"); } + /// Gets the opcode for the Pseudo Instruction used to initialize + /// the undef value. If no Instruction is available, this will + /// fail compilation. + virtual unsigned getUndefInitOpcode(unsigned RegClassID) const { + (void)RegClassID; + + llvm_unreachable("Unexpected register class."); + } + private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 5098fc68df3b20..e7c9ecd2e1851a 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -1172,6 +1172,28 @@ class TargetRegisterInfo : public MCRegisterInfo { virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const { return false; } + + /// Returns the Largest Super Class that is being initialized. There + /// should be a Pseudo Instruction implemented for the super class + /// that is being returned to ensure that Init Undef can apply the + /// initialization correctly. + virtual const TargetRegisterClass * + getLargestSuperClass(const TargetRegisterClass *RC) const { + llvm_unreachable("Unexpected target register class."); + } + + /// Returns if the architecture being targeted has the required Pseudo + /// Instructions for initializing the register. By default this returns false, + /// but where it is overriden for an architecture, the behaviour will be + /// different. This can either be a check to ensure the Register Class is + /// present, or to return true as an indication the architecture supports the + /// pass. If using the method that does not check for the Register Class, it + /// is imperative to ensure all required Pseudo Instructions are implemented, + /// otherwise compilation may fail with an `Unexpected register class` error. + virtual bool + doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const { + return false; + } }; //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index a064dec7d8ab38..7f8ed5c5019890 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -327,6 +327,12 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// Get the list of MacroFusion predicates. virtual std::vector getMacroFusions() const { return {}; }; + + /// supportsInitUndef is used to determine if an architecture supports + /// the Init Undef Pass. By default, it is assumed that it will not support + /// the pass, with architecture specific overrides providing the information + /// where they are implemented. + virtual bool supportsInitUndef() const { return false; } }; } // end namespace llvm diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 3db639a6872407..ba6af1ea44163b 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -301,6 +301,7 @@ void initializeTLSVariableHoistLegacyPassPass(PassRegistry &); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAAWrapperPassPass(PassRegistry&); void initializeTypePromotionLegacyPass(PassRegistry&); +void initializeInitUndefPass(PassRegistry &); void initializeUniformityInfoWrapperPassPass(PassRegistry &); void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &); void initializeUnpackMachineBundlesPass(PassRegistry&); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 80bbfb75185a9c..91bcdc491aa723 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1032,6 +1032,8 @@ void CodeGenPassBuilder::addOptimizedRegAlloc( AddMachinePass &addPass) const { addPass(DetectDeadLanesPass()); + addPass(InitUndefPass()); + addPass(ProcessImplicitDefsPass()); // Edge splitting is smarter with machine loop info. diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index d8972080beeb0d..016602730e0e97 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -173,6 +173,7 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass) DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass) +DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass) DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass) DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass) diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index d49bcf8a0c8ee2..e02c1d6417e077 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -75,6 +75,7 @@ add_llvm_component_library(LLVMCodeGen IfConversion.cpp ImplicitNullChecks.cpp IndirectBrExpandPass.cpp + InitUndef.cpp InlineSpiller.cpp InterferenceCache.cpp InterleavedAccessPass.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 418066452c1724..c889aab60df224 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -54,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandLegacyPassPass(Registry); + initializeInitUndefPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); initializeJMCInstrumenterPass(Registry); diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp similarity index 61% rename from llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp rename to llvm/lib/CodeGen/InitUndef.cpp index 735fc1350c0091..96ac385b6abf82 100644 --- a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -1,4 +1,4 @@ -//===- RISCVRVVInitUndef.cpp - Initialize undef vector value to pseudo ----===// +//===- InitUndef.cpp - Initialize undef value to pseudo ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,23 +6,22 @@ // //===----------------------------------------------------------------------===// // -// This file implements a function pass that initializes undef vector value to -// temporary pseudo instruction and remove it in expandpseudo pass to prevent -// register allocation resulting in a constraint violated result for vector -// instruction. It also rewrites the NoReg tied operand back to an -// IMPLICIT_DEF. +// This file implements a function pass that initializes undef value to +// temporary pseudo instruction to prevent register allocation resulting in a +// constraint violated result for the particular instruction. It also rewrites +// the NoReg tied operand back to an IMPLICIT_DEF. // -// RISC-V vector instruction has register overlapping constraint for certain -// instructions, and will cause illegal instruction trap if violated, we use -// early clobber to model this constraint, but it can't prevent register -// allocator allocated same or overlapped if the input register is undef value, -// so convert IMPLICIT_DEF to temporary pseudo instruction and remove it later -// could prevent that happen, it's not best way to resolve this, and it might +// Certain instructions have register overlapping constraints, and +// will cause illegal instruction trap if violated, we use early clobber to +// model this constraint, but it can't prevent register allocator allocating +// same or overlapped if the input register is undef value, so convert +// IMPLICIT_DEF to temporary pseudo instruction and remove it later could +// prevent that happen, it's not best way to resolve this, and it might // change the order of program or increase the register pressure, so ideally we // should model the constraint right, but before we model the constraint right, // it's the only way to prevent that happen. // -// When we enable the subregister liveness option, it will also trigger same +// When we enable the subregister liveness option, it will also trigger the same // issue due to the partial of register is undef. If we pseudoinit the whole // register, then it will generate redundant COPY instruction. Currently, it // will generate INSERT_SUBREG to make sure the whole register is occupied @@ -31,7 +30,7 @@ // // See also: https://github.com/llvm/llvm-project/issues/50157 // -// Additionally, this pass rewrites tied operands of vector instructions +// Additionally, this pass rewrites tied operands of instructions // from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of // operands to the above.) We use NoReg to side step a MachineCSE // optimization quality problem but need to convert back before @@ -39,23 +38,31 @@ // //===----------------------------------------------------------------------===// -#include "RISCV.h" -#include "RISCVSubtarget.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DetectDeadLanes.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" + using namespace llvm; -#define DEBUG_TYPE "riscv-init-undef" -#define RISCV_INIT_UNDEF_NAME "RISC-V init undef pass" +#define DEBUG_TYPE "init-undef" +#define INIT_UNDEF_NAME "Init Undef Pass" namespace { -class RISCVInitUndef : public MachineFunctionPass { +class InitUndef : public MachineFunctionPass { const TargetInstrInfo *TII; MachineRegisterInfo *MRI; - const RISCVSubtarget *ST; + const TargetSubtargetInfo *ST; const TargetRegisterInfo *TRI; // Newly added vregs, assumed to be fully rewritten @@ -65,7 +72,7 @@ class RISCVInitUndef : public MachineFunctionPass { public: static char ID; - RISCVInitUndef() : MachineFunctionPass(ID) {} + InitUndef() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -73,14 +80,11 @@ class RISCVInitUndef : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } - StringRef getPassName() const override { return RISCV_INIT_UNDEF_NAME; } + StringRef getPassName() const override { return INIT_UNDEF_NAME; } private: bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, const DeadLaneDetector &DLD); - bool isVectorRegClass(const Register R); - const TargetRegisterClass * - getVRLargestSuperClass(const TargetRegisterClass *RC) const; bool handleSubReg(MachineFunction &MF, MachineInstr &MI, const DeadLaneDetector &DLD); bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO); @@ -89,45 +93,9 @@ class RISCVInitUndef : public MachineFunctionPass { } // end anonymous namespace -char RISCVInitUndef::ID = 0; -INITIALIZE_PASS(RISCVInitUndef, DEBUG_TYPE, RISCV_INIT_UNDEF_NAME, false, false) -char &llvm::RISCVInitUndefID = RISCVInitUndef::ID; - -const TargetRegisterClass * -RISCVInitUndef::getVRLargestSuperClass(const TargetRegisterClass *RC) const { - if (RISCV::VRM8RegClass.hasSubClassEq(RC)) - return &RISCV::VRM8RegClass; - if (RISCV::VRM4RegClass.hasSubClassEq(RC)) - return &RISCV::VRM4RegClass; - if (RISCV::VRM2RegClass.hasSubClassEq(RC)) - return &RISCV::VRM2RegClass; - if (RISCV::VRRegClass.hasSubClassEq(RC)) - return &RISCV::VRRegClass; - return RC; -} - -bool RISCVInitUndef::isVectorRegClass(const Register R) { - const TargetRegisterClass *RC = MRI->getRegClass(R); - return RISCV::VRRegClass.hasSubClassEq(RC) || - RISCV::VRM2RegClass.hasSubClassEq(RC) || - RISCV::VRM4RegClass.hasSubClassEq(RC) || - RISCV::VRM8RegClass.hasSubClassEq(RC); -} - -static unsigned getUndefInitOpcode(unsigned RegClassID) { - switch (RegClassID) { - case RISCV::VRRegClassID: - return RISCV::PseudoRVVInitUndefM1; - case RISCV::VRM2RegClassID: - return RISCV::PseudoRVVInitUndefM2; - case RISCV::VRM4RegClassID: - return RISCV::PseudoRVVInitUndefM4; - case RISCV::VRM8RegClassID: - return RISCV::PseudoRVVInitUndefM8; - default: - llvm_unreachable("Unexpected register class."); - } -} +char InitUndef::ID = 0; +INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false) +char &llvm::InitUndefID = InitUndef::ID; static bool isEarlyClobberMI(MachineInstr &MI) { return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) { @@ -143,7 +111,7 @@ static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) { return false; } -bool RISCVInitUndef::handleReg(MachineInstr *MI) { +bool InitUndef::handleReg(MachineInstr *MI) { bool Changed = false; for (auto &UseMO : MI->uses()) { if (!UseMO.isReg()) @@ -152,7 +120,7 @@ bool RISCVInitUndef::handleReg(MachineInstr *MI) { continue; if (!UseMO.getReg().isVirtual()) continue; - if (!isVectorRegClass(UseMO.getReg())) + if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg()))) continue; if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI)) @@ -161,8 +129,8 @@ bool RISCVInitUndef::handleReg(MachineInstr *MI) { return Changed; } -bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, - const DeadLaneDetector &DLD) { +bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, + const DeadLaneDetector &DLD) { bool Changed = false; for (MachineOperand &UseMO : MI.uses()) { @@ -172,6 +140,8 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, continue; if (UseMO.isTied()) continue; + if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg()))) + continue; Register Reg = UseMO.getReg(); if (NewRegs.count(Reg)) @@ -183,7 +153,7 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, continue; const TargetRegisterClass *TargetRegClass = - getVRLargestSuperClass(MRI->getRegClass(Reg)); + TRI->getLargestSuperClass(MRI->getRegClass(Reg)); LaneBitmask NeedDef = Info.UsedLanes & ~Info.DefinedLanes; @@ -202,11 +172,12 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, Register LatestReg = Reg; for (auto ind : SubRegIndexNeedInsert) { Changed = true; - const TargetRegisterClass *SubRegClass = - getVRLargestSuperClass(TRI->getSubRegisterClass(TargetRegClass, ind)); + const TargetRegisterClass *SubRegClass = TRI->getLargestSuperClass( + TRI->getSubRegisterClass(TargetRegClass, ind)); Register TmpInitSubReg = MRI->createVirtualRegister(SubRegClass); + LLVM_DEBUG(dbgs() << "Register Class ID" << SubRegClass->getID() << "\n"); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), - TII->get(getUndefInitOpcode(SubRegClass->getID())), + TII->get(TII->getUndefInitOpcode(SubRegClass->getID())), TmpInitSubReg); Register NewReg = MRI->createVirtualRegister(TargetRegClass); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), @@ -223,15 +194,16 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, return Changed; } -bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { +bool InitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { LLVM_DEBUG( - dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register " + dbgs() << "Emitting PseudoInitUndef Instruction for implicit register " << MO.getReg() << '\n'); const TargetRegisterClass *TargetRegClass = - getVRLargestSuperClass(MRI->getRegClass(MO.getReg())); - unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID()); + TRI->getLargestSuperClass(MRI->getRegClass(MO.getReg())); + LLVM_DEBUG(dbgs() << "Register Class ID" << TargetRegClass->getID() << "\n"); + unsigned Opcode = TII->getUndefInitOpcode(TargetRegClass->getID()); Register NewReg = MRI->createVirtualRegister(TargetRegClass); BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg); MO.setReg(NewReg); @@ -240,9 +212,8 @@ bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { return true; } -bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, - MachineBasicBlock &MBB, - const DeadLaneDetector &DLD) { +bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, + const DeadLaneDetector &DLD) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { MachineInstr &MI = *I; @@ -252,15 +223,15 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, unsigned UseOpIdx; if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { MachineOperand &UseMO = MI.getOperand(UseOpIdx); - if (UseMO.getReg() == RISCV::NoRegister) { + if (UseMO.getReg() == MCRegister::NoRegister) { const TargetRegisterClass *RC = - TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); + TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); Register NewDest = MRI->createVirtualRegister(RC); // We don't have a way to update dead lanes, so keep track of the // new register so that we avoid querying it later. NewRegs.insert(NewDest); - BuildMI(MBB, I, I->getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), NewDest); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), + NewDest); UseMO.setReg(NewDest); Changed = true; } @@ -275,9 +246,16 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, return Changed; } -bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) { - ST = &MF.getSubtarget(); - if (!ST->hasVInstructions()) +bool InitUndef::runOnMachineFunction(MachineFunction &MF) { + ST = &MF.getSubtarget(); + + // supportsInitUndef is implemented to reflect if an architecture has support + // for the InitUndef pass. Support comes from having the relevant Pseudo + // instructions that can be used to initialize the register. The function + // returns false by default so requires an implementation per architecture. + // Support can be added by overriding the function in a way that best fits + // the architecture. + if (!ST->supportsInitUndef()) return false; MRI = &MF.getRegInfo(); @@ -297,5 +275,3 @@ bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) { return Changed; } - -FunctionPass *llvm::createRISCVInitUndefPass() { return new RISCVInitUndef(); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 2ed39a5696e205..cf068ece8d4cab 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1427,6 +1427,8 @@ void TargetPassConfig::addFastRegAlloc() { void TargetPassConfig::addOptimizedRegAlloc() { addPass(&DetectDeadLanesID); + addPass(&InitUndefID); + addPass(&ProcessImplicitDefsID); // LiveVariables currently requires pure SSA form. diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 15cda9b9432d5f..642739a29d6b06 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -2409,6 +2409,12 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { case ARM::SEH_EpilogEnd: ATS.emitARMWinCFIEpilogEnd(); return; + + case ARM::PseudoARMInitUndefMQPR: + case ARM::PseudoARMInitUndefSPR: + case ARM::PseudoARMInitUndefDPR_VFP2: + case ARM::PseudoARMInitUndefGPR: + return; } MCInst TmpInst; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index c3b475e0306eee..30f0730774b78c 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -13,16 +13,21 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H #define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H +#include "ARMBaseRegisterInfo.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsARM.h" +#include "llvm/Support/ErrorHandling.h" #include #include @@ -536,6 +541,19 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { std::optional isAddImmediate(const MachineInstr &MI, Register Reg) const override; + + unsigned getUndefInitOpcode(unsigned RegClassID) const override { + if (RegClassID == ARM::MQPRRegClass.getID()) + return ARM::PseudoARMInitUndefMQPR; + if (RegClassID == ARM::SPRRegClass.getID()) + return ARM::PseudoARMInitUndefSPR; + if (RegClassID == ARM::DPR_VFP2RegClass.getID()) + return ARM::PseudoARMInitUndefDPR_VFP2; + if (RegClassID == ARM::GPRRegClass.getID()) + return ARM::PseudoARMInitUndefGPR; + + llvm_unreachable("Unexpected register class."); + } }; /// Get the operands corresponding to the given \p Pred value. By default, the diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 926d702b4092a5..53803cff8b90ac 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -240,6 +240,33 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { unsigned SrcSubReg) const override; int getSEHRegNum(unsigned i) const { return getEncodingValue(i); } + + const TargetRegisterClass * + getLargestSuperClass(const TargetRegisterClass *RC) const override { + if (ARM::MQPRRegClass.hasSubClassEq(RC)) + return &ARM::MQPRRegClass; + if (ARM::SPRRegClass.hasSubClassEq(RC)) + return &ARM::SPRRegClass; + if (ARM::DPR_VFP2RegClass.hasSubClassEq(RC)) + return &ARM::DPR_VFP2RegClass; + if (ARM::GPRRegClass.hasSubClassEq(RC)) + return &ARM::GPRRegClass; + return RC; + } + + bool doesRegClassHavePseudoInitUndef( + const TargetRegisterClass *RC) const override { + (void)RC; + // For the ARM Architecture we want to always return true because all + // required PseudoInitUndef types have been added. If compilation fails due + // to `Unexpected register class`, this is likely to be because the specific + // register being used is not support by Init Undef and needs the Pseudo + // Instruction adding to ARMInstrInfo.td. If this is implemented as a + // conditional check, this could create a false positive where Init Undef is + // not running, skipping the instruction and moving to the next. This could + // lead to illegal instructions being generated by the register allocator. + return true; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 074dea36b64145..08b519e4d5cbf5 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6534,3 +6534,15 @@ let isPseudo = 1 in { let isTerminator = 1 in def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; } + + +//===----------------------------------------------------------------------===// +// Pseudo Instructions for use when early-clobber is defined and Greedy Register +// Allocation is used. This ensures the constraint is used properly. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { + def PseudoARMInitUndefMQPR : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>; + def PseudoARMInitUndefSPR : PseudoInst<(outs SPR:$sd), (ins), NoItinerary, []>; + def PseudoARMInitUndefDPR_VFP2 : PseudoInst<(outs DPR_VFP2:$dd), (ins), NoItinerary, []>; + def PseudoARMInitUndefGPR : PseudoInst<(outs GPR:$rd), (ins), NoItinerary, []>; +} diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 91f3978b041a3a..044b1c4c54e0c8 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -278,6 +278,13 @@ class ARMSubtarget : public ARMGenSubtargetInfo { return &InstrInfo->getRegisterInfo(); } + /// The correct instructions have been implemented to initialize undef + /// registers, therefore the ARM Architecture is supported by the Init Undef + /// Pass. This will return true as the pass needs to be supported for all + /// types of instructions. The pass will then perform more checks to ensure it + /// should be applying the Pseudo Instructions. + bool supportsInitUndef() const override { return true; } + const CallLowering *getCallLowering() const override; InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index ac88cd49db4e4b..8715403f3839a6 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -51,7 +51,6 @@ add_llvm_target(RISCVCodeGen RISCVMoveMerger.cpp RISCVPushPopOptimizer.cpp RISCVRegisterInfo.cpp - RISCVRVVInitUndef.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 9eb18099894b21..7af543f018ccbd 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -72,10 +72,6 @@ void initializeRISCVInsertWriteVXRMPass(PassRegistry &); FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); -FunctionPass *createRISCVInitUndefPass(); -void initializeRISCVInitUndefPass(PassRegistry &); -extern char &RISCVInitUndefID; - FunctionPass *createRISCVMoveMergePass(); void initializeRISCVMoveMergePass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 0f7d3e4e433908..2d567342599636 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H #define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H +#include "RISCV.h" #include "RISCVRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DiagnosticInfo.h" @@ -20,6 +21,7 @@ #define GET_INSTRINFO_HEADER #define GET_INSTRINFO_OPERAND_ENUM #include "RISCVGenInstrInfo.inc" +#include "RISCVGenRegisterInfo.inc" namespace llvm { @@ -262,6 +264,21 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; + unsigned getUndefInitOpcode(unsigned RegClassID) const override { + switch (RegClassID) { + case RISCV::VRRegClassID: + return RISCV::PseudoRVVInitUndefM1; + case RISCV::VRM2RegClassID: + return RISCV::PseudoRVVInitUndefM2; + case RISCV::VRM4RegClassID: + return RISCV::PseudoRVVInitUndefM4; + case RISCV::VRM8RegClassID: + return RISCV::PseudoRVVInitUndefM8; + default: + llvm_unreachable("Unexpected register class."); + } + } + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 431ea23b3e2d04..e46fe8ecb900fc 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -96,6 +96,27 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + + const TargetRegisterClass * + getLargestSuperClass(const TargetRegisterClass *RC) const override { + if (RISCV::VRM8RegClass.hasSubClassEq(RC)) + return &RISCV::VRM8RegClass; + if (RISCV::VRM4RegClass.hasSubClassEq(RC)) + return &RISCV::VRM4RegClass; + if (RISCV::VRM2RegClass.hasSubClassEq(RC)) + return &RISCV::VRM2RegClass; + if (RISCV::VRRegClass.hasSubClassEq(RC)) + return &RISCV::VRRegClass; + return RC; + } + + bool doesRegClassHavePseudoInitUndef( + const TargetRegisterClass *RC) const override { + return RISCV::VRRegClass.hasSubClassEq(RC) || + RISCV::VRM2RegClass.hasSubClassEq(RC) || + RISCV::VRM4RegClass.hasSubClassEq(RC) || + RISCV::VRM8RegClass.hasSubClassEq(RC); + } }; } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 8c55efa69a6a5f..b628b7b0b8c67d 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -277,6 +277,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { }; unsigned getMinimumJumpTableEntries() const; + + bool supportsInitUndef() const override { return hasVInstructions(); } }; } // End llvm namespace diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index adef40e19cba4a..3ff5f837d55214 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -124,7 +124,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVInsertReadWriteCSRPass(*PR); initializeRISCVInsertWriteVXRMPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); - initializeRISCVInitUndefPass(*PR); initializeRISCVMoveMergePass(*PR); initializeRISCVPushPopOptPass(*PR); } @@ -383,7 +382,6 @@ class RISCVPassConfig : public TargetPassConfig { bool addRegAssignAndRewriteOptimized() override; void addPreRegAlloc() override; void addPostRegAlloc() override; - void addOptimizedRegAlloc() override; void addFastRegAlloc() override; }; } // namespace @@ -564,14 +562,8 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVInsertWriteVXRMPass()); } -void RISCVPassConfig::addOptimizedRegAlloc() { - insertPass(&DetectDeadLanesID, &RISCVInitUndefID); - - TargetPassConfig::addOptimizedRegAlloc(); -} - void RISCVPassConfig::addFastRegAlloc() { - addPass(createRISCVInitUndefPass()); + addPass(&InitUndefID); TargetPassConfig::addFastRegAlloc(); } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 638f26298ee26a..ae0dbed09979b4 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -155,6 +155,7 @@ ; CHECK-NEXT: AArch64 MI Peephole Optimization pass ; CHECK-NEXT: AArch64 Dead register definitions ; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 48f00a82e3e1c6..c67328a025b858 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -321,6 +321,7 @@ ; GCN-O1-NEXT: Register Usage Information Propagation ; GCN-O1-NEXT: Detect Dead Lanes ; GCN-O1-NEXT: Remove dead machine instructions +; GCN-O1-NEXT: Init Undef Pass ; GCN-O1-NEXT: Process Implicit Definitions ; GCN-O1-NEXT: Remove unreachable machine basic blocks ; GCN-O1-NEXT: Live Variable Analysis @@ -618,6 +619,7 @@ ; GCN-O1-OPTS-NEXT: Register Usage Information Propagation ; GCN-O1-OPTS-NEXT: Detect Dead Lanes ; GCN-O1-OPTS-NEXT: Remove dead machine instructions +; GCN-O1-OPTS-NEXT: Init Undef Pass ; GCN-O1-OPTS-NEXT: Process Implicit Definitions ; GCN-O1-OPTS-NEXT: Remove unreachable machine basic blocks ; GCN-O1-OPTS-NEXT: Live Variable Analysis @@ -920,6 +922,7 @@ ; GCN-O2-NEXT: Register Usage Information Propagation ; GCN-O2-NEXT: Detect Dead Lanes ; GCN-O2-NEXT: Remove dead machine instructions +; GCN-O2-NEXT: Init Undef Pass ; GCN-O2-NEXT: Process Implicit Definitions ; GCN-O2-NEXT: Remove unreachable machine basic blocks ; GCN-O2-NEXT: Live Variable Analysis @@ -1235,6 +1238,7 @@ ; GCN-O3-NEXT: Register Usage Information Propagation ; GCN-O3-NEXT: Detect Dead Lanes ; GCN-O3-NEXT: Remove dead machine instructions +; GCN-O3-NEXT: Init Undef Pass ; GCN-O3-NEXT: Process Implicit Definitions ; GCN-O3-NEXT: Remove unreachable machine basic blocks ; GCN-O3-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 5e565970fc3a86..5914e98549fcc4 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -113,6 +113,7 @@ ; CHECK-NEXT: ARM pre- register allocation load / store optimization pass ; CHECK-NEXT: ARM A15 S->D optimizer ; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 3134d940545e80..a31eb8d11a35a6 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -105,6 +105,7 @@ ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: LoongArch Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index 6ce4416211cc4d..f94f91b38fecc9 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -149,6 +149,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Modulo Software Pipelining ; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index e90fa24761bc16..faf37545e1a117 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -43,7 +43,7 @@ ; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass -; CHECK-NEXT: RISC-V init undef pass +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 364c1e430b9156..90472f246918f3 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -120,7 +120,7 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: Detect Dead Lanes -; CHECK-NEXT: RISC-V init undef pass +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir index 4102aa8aa4d723..e090b313d4f7b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir +++ b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=riscv-init-undef -o - %s | FileCheck %s --check-prefix=MIR +# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=init-undef -o - %s | FileCheck %s --check-prefix=MIR ... --- name: vrgather_all_undef diff --git a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir index bf12a4df88d38c..9cafb323dc65c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir +++ b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=riscv-init-undef -o - | FileCheck %s +# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=init-undef -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir index 58b2687824aa14..dcf61c048ff0e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass riscv-init-undef -run-pass machineverifier %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass init-undef -run-pass machineverifier %s -o - | FileCheck %s --- | source_filename = "" diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll index 9bb24fc61ccef3..02234c63725360 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll @@ -699,6 +699,17 @@ entry: ret <4 x i32> %0 } +define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() { +; CHECK-LABEL: test_vhcaddq_rot270_s32_undef: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270 +; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef) + ret <4 x i32> %0 +} + define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { ; CHECK-LABEL: test_vhcaddq_rot90_x_s8: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll index c95fe2296e0998..203ce1f8811895 100644 --- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll +++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll @@ -373,13 +373,13 @@ define arm_aapcs_vfpcc void @mul_i32(ptr %A, ptr %B, i64 %C, ptr %D) { ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: ldr.w lr, [sp, #20] -; CHECK-NEXT: vmov.f32 s10, s1 ; CHECK-NEXT: vmov.f32 s14, s5 +; CHECK-NEXT: vmov.f32 s10, s1 ; CHECK-NEXT: vmov r5, s4 ; CHECK-NEXT: vmov.f32 s4, s6 ; CHECK-NEXT: vmov.f32 s6, s7 -; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: smull r12, r3, r1, r0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmov.f32 s0, s2 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index bd672d1ba4f660..6d581afe9fb31e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -222,88 +222,88 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q4, [r5], #16 -; CHECK-NEXT: vldrw.u32 q3, [r0], #16 +; CHECK-NEXT: vldrw.u32 q3, [r5], #16 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 ; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov.w r2, #-1 -; CHECK-NEXT: vmov.f32 s8, s14 +; CHECK-NEXT: vmov.f32 s16, s10 ; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: vmov.f32 s20, s18 +; CHECK-NEXT: vmov.f32 s20, s14 +; CHECK-NEXT: vmov.f32 s18, s11 +; CHECK-NEXT: vmov.f32 s22, s15 ; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: vmov.f32 s10, s15 -; CHECK-NEXT: vmov.f32 s22, s19 -; CHECK-NEXT: vmullb.s32 q6, q5, q2 -; CHECK-NEXT: vmov.f32 s18, s17 +; CHECK-NEXT: vmullb.s32 q6, q5, q4 +; CHECK-NEXT: vmov.f32 s14, s13 ; CHECK-NEXT: vmov r4, r7, d12 ; CHECK-NEXT: asrl r4, r7, #31 -; CHECK-NEXT: vmov.f32 s14, s13 +; CHECK-NEXT: vmov.f32 s10, s9 ; CHECK-NEXT: rsbs.w r5, r4, #-2147483648 ; CHECK-NEXT: sbcs.w r5, r2, r7 ; CHECK-NEXT: csetm r5, lt ; CHECK-NEXT: bfi r8, r5, #0, #8 ; CHECK-NEXT: vmov r10, r5, d13 ; CHECK-NEXT: asrl r10, r5, #31 -; CHECK-NEXT: vmov r6, s18 +; CHECK-NEXT: vmov r6, s14 ; CHECK-NEXT: rsbs.w r3, r10, #-2147483648 -; CHECK-NEXT: vmov q2[2], q2[0], r4, r10 +; CHECK-NEXT: vmov q4[2], q4[0], r4, r10 ; CHECK-NEXT: sbcs.w r3, r2, r5 -; CHECK-NEXT: vmov q2[3], q2[1], r7, r5 +; CHECK-NEXT: vmov q4[3], q4[1], r7, r5 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r8, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r8 ; CHECK-NEXT: mvn r8, #-2147483648 -; CHECK-NEXT: vpsel q2, q2, q0 -; CHECK-NEXT: vmov r3, r4, d4 +; CHECK-NEXT: vpsel q4, q4, q0 +; CHECK-NEXT: vmov r3, r4, d8 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d5 +; CHECK-NEXT: vmov r3, r5, d9 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 -; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r3, s8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vmov r4, s16 -; CHECK-NEXT: vpsel q2, q2, q1 +; CHECK-NEXT: vmov r4, s12 +; CHECK-NEXT: vpsel q4, q4, q1 ; CHECK-NEXT: smull r4, r7, r4, r3 ; CHECK-NEXT: asrl r4, r7, #31 ; CHECK-NEXT: rsbs.w r3, r4, #-2147483648 ; CHECK-NEXT: sbcs.w r3, r2, r7 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: vmov r3, s14 +; CHECK-NEXT: vmov r3, s10 ; CHECK-NEXT: smull r6, r3, r6, r3 ; CHECK-NEXT: asrl r6, r3, #31 ; CHECK-NEXT: rsbs.w r1, r6, #-2147483648 -; CHECK-NEXT: vmov q3[2], q3[0], r4, r6 +; CHECK-NEXT: vmov q2[2], q2[0], r4, r6 ; CHECK-NEXT: sbcs.w r1, r2, r3 -; CHECK-NEXT: vmov q3[3], q3[1], r7, r3 +; CHECK-NEXT: vmov q2[3], q2[1], r7, r3 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r5, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: vpsel q3, q3, q0 -; CHECK-NEXT: vmov r1, r3, d6 +; CHECK-NEXT: vpsel q2, q2, q0 +; CHECK-NEXT: vmov r1, r3, d4 ; CHECK-NEXT: subs.w r1, r1, r8 ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #0, #8 -; CHECK-NEXT: vmov r1, r4, d7 +; CHECK-NEXT: vmov r1, r4, d5 ; CHECK-NEXT: subs.w r1, r1, r8 ; CHECK-NEXT: sbcs r1, r4, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: vpsel q3, q3, q1 -; CHECK-NEXT: vmov.f32 s13, s14 -; CHECK-NEXT: vmov.f32 s14, s8 -; CHECK-NEXT: vmov.f32 s15, s10 -; CHECK-NEXT: vstrb.8 q3, [r2], #16 +; CHECK-NEXT: vpsel q2, q2, q1 +; CHECK-NEXT: vmov.f32 s9, s10 +; CHECK-NEXT: vmov.f32 s10, s16 +; CHECK-NEXT: vmov.f32 s11, s18 +; CHECK-NEXT: vstrb.8 q2, [r2], #16 ; CHECK-NEXT: le lr, .LBB1_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload @@ -462,14 +462,14 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n ; CHECK-NEXT: vcmp.u32 cs, q1, q4 ; CHECK-NEXT: vstr p0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q5, [r0], #16 -; CHECK-NEXT: vldrwt.u32 q6, [r1], #16 -; CHECK-NEXT: vmov.f32 s16, s22 -; CHECK-NEXT: vmov.f32 s18, s23 -; CHECK-NEXT: vmov.f32 s28, s26 -; CHECK-NEXT: vmov.f32 s30, s27 -; CHECK-NEXT: vmullb.s32 q0, q7, q4 -; CHECK-NEXT: vmov.f32 s22, s25 +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vldrwt.u32 q5, [r1], #16 +; CHECK-NEXT: vmov.f32 s24, s18 +; CHECK-NEXT: vmov.f32 s26, s19 +; CHECK-NEXT: vmov.f32 s28, s22 +; CHECK-NEXT: vmov.f32 s30, s23 +; CHECK-NEXT: vmullb.s32 q0, q7, q6 +; CHECK-NEXT: vmov.f32 s18, s21 ; CHECK-NEXT: vmov r10, r5, d0 ; CHECK-NEXT: asrl r10, r5, #31 ; CHECK-NEXT: rsbs.w r7, r10, #-2147483648 @@ -483,7 +483,7 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n ; CHECK-NEXT: sbcs.w r3, r12, r7 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r7 ; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: vmov r7, s22 +; CHECK-NEXT: vmov r7, s18 ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -498,11 +498,11 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 -; CHECK-NEXT: vmov r3, s20 +; CHECK-NEXT: vmov r3, s16 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vpsel q4, q0, q3 -; CHECK-NEXT: vmov.f32 s2, s21 +; CHECK-NEXT: vmov r4, s20 +; CHECK-NEXT: vpsel q6, q0, q3 +; CHECK-NEXT: vmov.f32 s2, s17 ; CHECK-NEXT: smull r10, r5, r4, r3 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: asrl r10, r5, #31 @@ -536,8 +536,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n ; CHECK-NEXT: vpsel q0, q0, q3 ; CHECK-NEXT: vldr p0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: vmov.f32 s1, s2 -; CHECK-NEXT: vmov.f32 s2, s16 -; CHECK-NEXT: vmov.f32 s3, s18 +; CHECK-NEXT: vmov.f32 s2, s24 +; CHECK-NEXT: vmov.f32 s3, s26 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB2_2 @@ -778,18 +778,17 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: .LBB4_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vldrw.u32 q3, [r1], #16 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s16, s14 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: vmullb.u32 q5, q4, q2 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 +; CHECK-NEXT: vmov.f32 s12, s6 +; CHECK-NEXT: vmov.f32 s16, s10 +; CHECK-NEXT: vmov.f32 s14, s7 +; CHECK-NEXT: vmov.f32 s18, s11 +; CHECK-NEXT: vmullb.u32 q5, q4, q3 ; CHECK-NEXT: vmov.f32 s6, s5 ; CHECK-NEXT: vmov r10, r5, d10 ; CHECK-NEXT: lsrl r10, r5, #31 -; CHECK-NEXT: vmov.f32 s14, s13 +; CHECK-NEXT: vmov.f32 s10, s9 ; CHECK-NEXT: subs.w r6, r10, #-1 -; CHECK-NEXT: vmullb.u32 q4, q3, q1 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: csetm r5, lo @@ -797,15 +796,16 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vmov r4, r5, d11 ; CHECK-NEXT: lsrl r4, r5, #31 ; CHECK-NEXT: subs.w r7, r4, #-1 -; CHECK-NEXT: vmov q2[2], q2[0], r10, r4 +; CHECK-NEXT: vmov q3[2], q3[0], r10, r4 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #8, #8 +; CHECK-NEXT: vmsr p0, r6 +; CHECK-NEXT: vpsel q3, q3, q0 +; CHECK-NEXT: vmullb.u32 q4, q2, q1 ; CHECK-NEXT: vmov r10, r5, d8 ; CHECK-NEXT: lsrl r10, r5, #31 -; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: subs.w r6, r10, #-1 -; CHECK-NEXT: vpsel q2, q2, q0 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: csetm r5, lo @@ -820,8 +820,8 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q1, q1, q0 ; CHECK-NEXT: vmov.f32 s5, s6 -; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: vmov.f32 s7, s10 +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vmov.f32 s7, s14 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: le lr, .LBB4_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll index 217caeebe63356..cebc0d9c0e172c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll @@ -190,12 +190,17 @@ entry: define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: sext32_0213_0ext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmullb.s32 q2, q0, q3 +; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.f32 s0, s1 +; CHECK-NEXT: vmullb.s32 q2, q4, q3 ; CHECK-NEXT: vmov.f32 s2, s3 ; CHECK-NEXT: vmullb.s32 q1, q0, q3 ; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -210,12 +215,17 @@ entry: define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: sext32_0ext_0213: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmullb.s32 q2, q3, q0 +; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.f32 s0, s1 +; CHECK-NEXT: vmullb.s32 q2, q3, q4 ; CHECK-NEXT: vmov.f32 s2, s3 ; CHECK-NEXT: vmullb.s32 q1, q3, q0 ; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -466,12 +476,17 @@ entry: define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: zext32_0213_0ext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmullb.u32 q2, q0, q3 +; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.f32 s0, s1 +; CHECK-NEXT: vmullb.u32 q2, q4, q3 ; CHECK-NEXT: vmov.f32 s2, s3 ; CHECK-NEXT: vmullb.u32 q1, q0, q3 ; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -486,12 +501,17 @@ entry: define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: zext32_0ext_0213: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmullb.u32 q2, q3, q0 +; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmov.f32 s0, s1 +; CHECK-NEXT: vmullb.u32 q2, q3, q4 ; CHECK-NEXT: vmov.f32 s2, s3 ; CHECK-NEXT: vmullb.u32 q1, q3, q0 ; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 6f2bba84a6eccf..43589dc993dabb 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -131,6 +131,7 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Tile Register Pre-configure ; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index 43eaa72047e659..12366a35d6f1a2 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -132,7 +132,6 @@ static_library("LLVMRISCVCodeGen") { "RISCVOptWInstrs.cpp", "RISCVPostRAExpandPseudoInsts.cpp", "RISCVPushPopOptimizer.cpp", - "RISCVRVVInitUndef.cpp", "RISCVRedundantCopyElimination.cpp", "RISCVRegisterInfo.cpp", "RISCVSubtarget.cpp",