From 4dce0292beae9ef9a4cb0881006f31b8734f7512 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Tue, 23 Jul 2024 01:31:41 +0300 Subject: [PATCH 01/11] [Xtensa] Implement lowering Mul/Div/Shift/ROT/CTTZ/CTLZ/CTPOP operations. Implement lowering of the Mul/Div operations and also shift parts operations. Implement lowering of the bit manipulations, like ROT/SWAP/CTPOP/CTTZ/CTLZ. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 252 ++++++- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 14 + llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 24 + .../Target/Xtensa/XtensaMachineFunctionInfo.h | 53 ++ llvm/lib/Target/Xtensa/XtensaOperators.td | 8 + .../lib/Target/Xtensa/XtensaTargetMachine.cpp | 7 + llvm/lib/Target/Xtensa/XtensaTargetMachine.h | 4 + llvm/test/CodeGen/Xtensa/bswap.ll | 413 ++++++++++++ llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 531 +++++++++++++++ llvm/test/CodeGen/Xtensa/div.ll | 491 ++++++++++++++ llvm/test/CodeGen/Xtensa/mul.ll | 636 ++++++++++++++++++ llvm/test/CodeGen/Xtensa/rotl-rotr.ll | 500 ++++++++++++++ llvm/test/CodeGen/Xtensa/shift.ll | 72 ++ 13 files changed, 3001 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h create mode 100644 llvm/test/CodeGen/Xtensa/bswap.ll create mode 100644 llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll create mode 100644 llvm/test/CodeGen/Xtensa/div.ll create mode 100644 llvm/test/CodeGen/Xtensa/mul.ll create mode 100644 llvm/test/CodeGen/Xtensa/rotl-rotr.ll create mode 100644 llvm/test/CodeGen/Xtensa/shift.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 80d01d662a221..8c30dbbad821e 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -13,6 +13,7 @@ #include "XtensaISelLowering.h" #include "XtensaConstantPoolValue.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaSubtarget.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -21,6 +22,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -98,6 +100,32 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setCondCodeAction(ISD::SETUGT, MVT::i32, Expand); setCondCodeAction(ISD::SETULE, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i32, Custom); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); + + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + // Implement custom stack allocations setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); // Implement custom stack save and restore @@ -665,12 +693,30 @@ SDValue XtensaTargetLowering::getAddrPCRel(SDValue Op, SDValue XtensaTargetLowering::LowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); + auto C = const_cast(CP->getConstVal()); + auto T = const_cast(CP->getType()); SDValue Result; - if (!CP->isMachineConstantPoolEntry()) { - Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), - CP->getOffset()); + + // Do not use constant pool for aggregate or vector constant types, + // in such cases create global variable, for example to store tabel + // when we lower CTTZ operation. + if (T->isAggregateType() || T->isVectorTy()) { + auto AFI = DAG.getMachineFunction().getInfo(); + auto M = const_cast( + DAG.getMachineFunction().getFunction().getParent()); + auto GV = new GlobalVariable( + *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, + Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + + Twine(AFI->createLabelUId())); + Result = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); } else { - report_fatal_error("This constantpool type is not supported yet"); + if (!CP->isMachineConstantPoolEntry()) { + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlign(), CP->getOffset()); + } else { + report_fatal_error("This constantpool type is not supported yet"); + } } return getAddrPCRel(Result, DAG); @@ -713,6 +759,131 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = MVT::i32; + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + + // if Shamt - register size < 0: // Shamt < register size + // Lo = Lo << Shamt + // Hi = (Hi << Shamt) | (Lo >>u (register size - Shamt)) + // else: + // Lo = 0 + // Hi = Lo << (Shamt - register size) + + SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue ShamtMinusRegisterSize = + DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); + + SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + + SDValue HiTrue = DAG.getNode(XtensaISD::SRCL, DL, VT, Hi, Lo, Shamt); + + SDValue Zero = DAG.getConstant(0, DL, VT); + + SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize); + + SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, Zero); + + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse); + + return DAG.getMergeValues({Lo, Hi}, DL); +} + +SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + MVT VT = MVT::i32; + + // SRA expansion: + // if Shamt - register size < 0: // Shamt < register size + // Lo = (Lo >>u Shamt) | (Hi << u (register size - Shamt)) + // Hi = Hi >>s Shamt + // else: + // Lo = Hi >>s (Shamt - register size); + // Hi = Hi >>s (register size - 1) + // + // SRL expansion: + // if Shamt - register size < 0: // Shamt < register size + // Lo = (Lo >>u Shamt) | (Hi << u (register size - Shamt)) + // Hi = Hi >>u Shamt + // else: + // Lo = Hi >>u (Shamt - register size); + // Hi = 0; + + unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; + + SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusRegisterSize = + DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); + + SDValue LoTrue = DAG.getNode(XtensaISD::SRCR, DL, VT, Hi, Lo, Shamt); + + SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); + + SDValue Zero = DAG.getConstant(0, DL, VT); + + SDValue LoFalse = + DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize); + + SDValue HiFalse; + + if (IsSRA) { + HiFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, RegisterSizeMinus1); + } else { + HiFalse = Zero; + } + + SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, LoFalse); + + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue XtensaTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op->getValueType(0); + SDLoc DL(Op); + + if (VT != MVT::i32) + return SDValue(); + + ConstantSDNode *C = dyn_cast(Op->getOperand(1)); + if (!C) + return SDValue(); + + int64_t MulAmt = C->getSExtValue(); + unsigned ShiftAmt = 0; + + switch (MulAmt) { + case 2: + ShiftAmt = 1; + break; + case 4: + ShiftAmt = 2; + break; + case 8: + ShiftAmt = 3; + break; + default: + return SDValue(); + } + + return DAG.getNode(ISD::SHL, DL, VT, Op->getOperand(0), + DAG.getConstant(ShiftAmt, DL, VT)); +} + SDValue XtensaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -728,6 +899,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerJumpTable(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(cast(Op), DAG); + case ISD::MUL: + return LowerMUL(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STACKSAVE: @@ -736,6 +909,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerSTACKRESTORE(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return LowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return LowerShiftRightParts(Op, DAG, false); default: report_fatal_error("Unexpected node to lower"); } @@ -753,6 +932,10 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const { return "XtensaISD::RET"; case XtensaISD::SELECT_CC: return "XtensaISD::SELECT_CC"; + case XtensaISD::SRCL: + return "XtensaISD::SRCL"; + case XtensaISD::SRCR: + return "XtensaISD::SRCR"; } return nullptr; } @@ -827,9 +1010,70 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI, MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + switch (MI.getOpcode()) { case Xtensa::SELECT: return emitSelectCC(MI, MBB); + case Xtensa::SHL_P: { + MachineOperand &R = MI.getOperand(0); + MachineOperand &S = MI.getOperand(1); + MachineOperand &SA = MI.getOperand(2); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSL)).addReg(SA.getReg()); + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SLL), R.getReg()).addReg(S.getReg()); + MI.eraseFromParent(); + return MBB; + } + case Xtensa::SRA_P: { + MachineOperand &R = MI.getOperand(0); + MachineOperand &T = MI.getOperand(1); + MachineOperand &SA = MI.getOperand(2); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg()); + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRA), R.getReg()).addReg(T.getReg()); + MI.eraseFromParent(); + return MBB; + } + case Xtensa::SRL_P: { + MachineOperand &R = MI.getOperand(0); + MachineOperand &T = MI.getOperand(1); + MachineOperand &SA = MI.getOperand(2); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg()); + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRL), R.getReg()).addReg(T.getReg()); + MI.eraseFromParent(); + return MBB; + } + case Xtensa::SRCL_P: { + MachineOperand &R = MI.getOperand(0); + MachineOperand &HI = MI.getOperand(1); + MachineOperand &LO = MI.getOperand(2); + MachineOperand &SA = MI.getOperand(3); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSL)).addReg(SA.getReg()); + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRC), R.getReg()) + .addReg(HI.getReg()) + .addReg(LO.getReg()); + ; + MI.eraseFromParent(); + return MBB; + } + case Xtensa::SRCR_P: { + MachineOperand &R = MI.getOperand(0); + MachineOperand &HI = MI.getOperand(1); + MachineOperand &LO = MI.getOperand(2); + MachineOperand &SA = MI.getOperand(3); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg()); + BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRC), R.getReg()) + .addReg(HI.getReg()) + .addReg(LO.getReg()); + ; + MI.eraseFromParent(); + return MBB; + } default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index dd811ae9f3a77..b4c4929922cbf 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -40,6 +40,10 @@ enum { // the lhs and rhs (ops #0 and #1) of a conditional expression with the // condition code in op #4 SELECT_CC, + + // Shift + SRCL, + SRCR, }; } @@ -50,6 +54,10 @@ class XtensaTargetLowering : public TargetLowering { explicit XtensaTargetLowering(const TargetMachine &TM, const XtensaSubtarget &STI); + MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override { + return LHSTy.getSizeInBits() <= 32 ? MVT::i32 : MVT::i64; + } + EVT getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const override { if (!VT.isVector()) @@ -103,6 +111,8 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; @@ -111,6 +121,10 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; + SDValue getAddrPCRel(SDValue Op, SelectionDAG &DAG) const; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index fc134e794153b..18a31fef18446 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -189,6 +189,30 @@ def SSAI : RRR_Inst<0x00, 0x00, 0x04, (outs), (ins uimm5:$imm), let t{0} = imm{4}; } +// Shift Pseudo instructions: +// SSL/SSR + Shift combination +let usesCustomInserter = 1 in { + def SHL_P : Pseudo<(outs AR:$r), (ins AR:$s, AR:$sa), + "# SHL_P $r, $s, $sa", + [(set i32:$r, (shl i32:$s, i32:$sa))]>; + + def SRA_P : Pseudo<(outs AR:$r), (ins AR:$t, AR:$sa), + "# SRA_P $r, $t, $sa", + [(set i32:$r, (sra i32:$t, i32:$sa))]>; + + def SRL_P : Pseudo<(outs AR:$r), (ins AR:$t, AR:$sa), + "# SRL_P $r, $t, $sa", + [(set i32:$r, (srl i32:$t, i32:$sa))]>; + + def SRCL_P : Pseudo<(outs AR:$r), (ins AR:$hi, AR:$lo, AR:$sa), + "# SRCL_P $r, $hi, $lo, $sa", + [(set i32:$r, (Xtensa_srcl i32:$hi, i32:$lo, i32:$sa))]>; + + def SRCR_P : Pseudo<(outs AR:$r), (ins AR:$hi, AR:$lo, AR:$sa), + "# SRCR_P $r, $hi, $lo, $sa", + [(set i32:$r, (Xtensa_srcr i32:$hi, i32:$lo, i32:$sa))]>; +} + //===----------------------------------------------------------------------===// // Load and store instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h new file mode 100644 index 0000000000000..86ee81128c34c --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -0,0 +1,53 @@ +//==- XtensaMachineFunctionInfo.h - Xtensa machine function info --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares Xtensa-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class XtensaFunctionInfo : public MachineFunctionInfo { + unsigned VarArgsFirstGPR; + int VarArgsStackOffset; + unsigned VarArgsFrameIndex; + bool SaveFrameRegister = false; + unsigned LabelUId = 0; + +public: + explicit XtensaFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) + : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {} + + unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } + + int getVarArgsStackOffset() const { return VarArgsStackOffset; } + void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } + + // Get and set the frame index of the first stack vararg. + unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } + + bool isSaveFrameRegister() const { return SaveFrameRegister; } + void setSaveFrameRegister() { SaveFrameRegister = true; } + + unsigned createLabelUId() { return LabelUId++; } +}; + +} // namespace llvm + +#endif /* LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H */ diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td index 93cd1c933dbde..c825359f3c5dd 100644 --- a/llvm/lib/Target/Xtensa/XtensaOperators.td +++ b/llvm/lib/Target/Xtensa/XtensaOperators.td @@ -24,6 +24,10 @@ def SDT_XtensaSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, SDTCisSameAs<2, 3>, SDTCisVT<5, i32>]>; + +def SDT_XtensaSRC : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + //===----------------------------------------------------------------------===// // Node definitions //===----------------------------------------------------------------------===// @@ -46,3 +50,7 @@ def Xtensa_brjt: SDNode<"XtensaISD::BR_JT", SDT_XtensaBrJT, [SDNPHasChain]>; def Xtensa_select_cc: SDNode<"XtensaISD::SELECT_CC", SDT_XtensaSelectCC, [SDNPInGlue]>; + +def Xtensa_srcl: SDNode<"XtensaISD::SRCL", SDT_XtensaSRC>; + +def Xtensa_srcr: SDNode<"XtensaISD::SRCR", SDT_XtensaSRC>; diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp index 49c7faf84df1d..eba169a2fe7a9 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp @@ -14,6 +14,7 @@ #include "XtensaTargetMachine.h" #include "TargetInfo/XtensaTargetInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -83,6 +84,12 @@ XtensaTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +MachineFunctionInfo *XtensaTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return XtensaFunctionInfo::create(Allocator, F, STI); +} + namespace { /// Xtensa Code Generator Pass Configuration Options. class XtensaPassConfig : public TargetPassConfig { diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h index f371f22ed3d0e..6975076b5d699 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h @@ -45,6 +45,10 @@ class XtensaTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + protected: mutable StringMap> SubtargetMap; }; diff --git a/llvm/test/CodeGen/Xtensa/bswap.ll b/llvm/test/CodeGen/Xtensa/bswap.ll new file mode 100644 index 0000000000000..e4458c7cf81c3 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/bswap.ll @@ -0,0 +1,413 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare i8 @llvm.bitreverse.i8(i8) +declare i16 @llvm.bitreverse.i16(i16) +declare i32 @llvm.bitreverse.i32(i32) +declare i64 @llvm.bitreverse.i64(i64) + +define i16 @test_bswap_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_bswap_i16: +; XTENSA: l32r a8, .LCPI0_0 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: slli a9, a2, 8 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.bswap.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bswap_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_bswap_i32: +; XTENSA: movi a8, 24 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: srli a9, a2, 8 +; XTENSA-NEXT: l32r a10, .LCPI1_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: and a9, a2, a10 +; XTENSA-NEXT: slli a9, a9, 8 +; XTENSA-NEXT: slli a10, a2, 24 +; XTENSA-NEXT: or a9, a10, a9 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bswap_i64(i64 %a) nounwind { +; XTENSA-LABEL: test_bswap_i64: +; XTENSA: movi a9, 24 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a8, a3 +; XTENSA-NEXT: srli a10, a3, 8 +; XTENSA-NEXT: l32r a11, .LCPI2_0 +; XTENSA-NEXT: and a10, a10, a11 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: and a10, a3, a11 +; XTENSA-NEXT: slli a10, a10, 8 +; XTENSA-NEXT: slli a7, a3, 24 +; XTENSA-NEXT: or a10, a7, a10 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a2 +; XTENSA-NEXT: srli a10, a2, 8 +; XTENSA-NEXT: and a10, a10, a11 +; XTENSA-NEXT: or a9, a10, a9 +; XTENSA-NEXT: and a10, a2, a11 +; XTENSA-NEXT: slli a10, a10, 8 +; XTENSA-NEXT: slli a11, a2, 24 +; XTENSA-NEXT: or a10, a11, a10 +; XTENSA-NEXT: or a3, a10, a9 +; XTENSA-NEXT: or a2, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %tmp +} + +define i8 @test_bitreverse_i8(i8 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_i8: +; XTENSA: movi a8, 15 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: slli a8, a8, 4 +; XTENSA-NEXT: movi a9, 240 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: srli a9, a9, 4 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: movi a10, 51 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i8 @llvm.bitreverse.i8(i8 %a) + ret i8 %tmp +} + +define i16 @test_bitreverse_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_i16: +; XTENSA: l32r a8, .LCPI4_0 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: slli a9, a2, 8 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: l32r a10, .LCPI4_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 4 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: l32r a10, .LCPI4_2 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI4_3 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bitreverse_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_i32: +; XTENSA: movi a8, 24 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: srli a9, a2, 8 +; XTENSA-NEXT: l32r a10, .LCPI5_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: and a9, a2, a10 +; XTENSA-NEXT: slli a9, a9, 8 +; XTENSA-NEXT: slli a10, a2, 24 +; XTENSA-NEXT: or a9, a10, a9 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: l32r a10, .LCPI5_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 4 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: l32r a10, .LCPI5_2 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI5_3 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bitreverse_i64(i64 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_i64: +; XTENSA: movi a10, 24 +; XTENSA-NEXT: ssr a10 +; XTENSA-NEXT: srl a8, a3 +; XTENSA-NEXT: srli a11, a3, 8 +; XTENSA-NEXT: l32r a9, .LCPI6_0 +; XTENSA-NEXT: and a11, a11, a9 +; XTENSA-NEXT: or a8, a11, a8 +; XTENSA-NEXT: and a11, a3, a9 +; XTENSA-NEXT: slli a11, a11, 8 +; XTENSA-NEXT: slli a7, a3, 24 +; XTENSA-NEXT: or a11, a7, a11 +; XTENSA-NEXT: or a8, a11, a8 +; XTENSA-NEXT: srli a7, a8, 4 +; XTENSA-NEXT: l32r a11, .LCPI6_1 +; XTENSA-NEXT: and a7, a7, a11 +; XTENSA-NEXT: and a8, a8, a11 +; XTENSA-NEXT: slli a8, a8, 4 +; XTENSA-NEXT: or a8, a7, a8 +; XTENSA-NEXT: srli a7, a8, 2 +; XTENSA-NEXT: l32r a6, .LCPI6_2 +; XTENSA-NEXT: and a7, a7, a6 +; XTENSA-NEXT: and a8, a8, a6 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a7, a8 +; XTENSA-NEXT: srli a7, a8, 1 +; XTENSA-NEXT: l32r a5, .LCPI6_3 +; XTENSA-NEXT: and a7, a7, a5 +; XTENSA-NEXT: and a8, a8, a5 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a8, a7, a8 +; XTENSA-NEXT: ssr a10 +; XTENSA-NEXT: srl a10, a2 +; XTENSA-NEXT: srli a7, a2, 8 +; XTENSA-NEXT: and a7, a7, a9 +; XTENSA-NEXT: or a10, a7, a10 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: slli a9, a9, 8 +; XTENSA-NEXT: slli a7, a2, 24 +; XTENSA-NEXT: or a9, a7, a9 +; XTENSA-NEXT: or a9, a9, a10 +; XTENSA-NEXT: srli a10, a9, 4 +; XTENSA-NEXT: and a10, a10, a11 +; XTENSA-NEXT: and a9, a9, a11 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a9, a10, a9 +; XTENSA-NEXT: srli a10, a9, 2 +; XTENSA-NEXT: and a10, a10, a6 +; XTENSA-NEXT: and a9, a9, a6 +; XTENSA-NEXT: slli a9, a9, 2 +; XTENSA-NEXT: or a9, a10, a9 +; XTENSA-NEXT: srli a10, a9, 1 +; XTENSA-NEXT: and a10, a10, a5 +; XTENSA-NEXT: and a9, a9, a5 +; XTENSA-NEXT: slli a9, a9, 1 +; XTENSA-NEXT: or a3, a10, a9 +; XTENSA-NEXT: or a2, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %tmp +} + +define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_bswap_bitreverse_i16: +; XTENSA: srli a8, a2, 4 +; XTENSA-NEXT: l32r a9, .LCPI7_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: l32r a10, .LCPI7_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI7_2 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.bswap.i16(i16 %a) + %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_bswap_bitreverse_i32: +; XTENSA: srli a8, a2, 4 +; XTENSA-NEXT: l32r a9, .LCPI8_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: l32r a10, .LCPI8_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI8_2 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.bswap.i32(i32 %a) + %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { +; XTENSA-LABEL: test_bswap_bitreverse_i64: +; XTENSA: srli a8, a2, 4 +; XTENSA-NEXT: l32r a9, .LCPI9_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a10, a2, a9 +; XTENSA-NEXT: slli a10, a10, 4 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: srli a10, a8, 2 +; XTENSA-NEXT: l32r a11, .LCPI9_1 +; XTENSA-NEXT: and a10, a10, a11 +; XTENSA-NEXT: and a8, a8, a11 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: srli a10, a8, 1 +; XTENSA-NEXT: l32r a7, .LCPI9_2 +; XTENSA-NEXT: and a10, a10, a7 +; XTENSA-NEXT: and a8, a8, a7 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a10, a8 +; XTENSA-NEXT: srli a8, a3, 4 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a9, a3, a9 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: and a9, a9, a11 +; XTENSA-NEXT: and a8, a8, a11 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: and a9, a9, a7 +; XTENSA-NEXT: and a8, a8, a7 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a3, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp) + ret i64 %tmp2 +} + +define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_bswap_i16: +; XTENSA: srli a8, a2, 4 +; XTENSA-NEXT: l32r a9, .LCPI10_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: l32r a10, .LCPI10_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI10_2 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_bswap_i32: +; XTENSA: srli a8, a2, 4 +; XTENSA-NEXT: l32r a9, .LCPI11_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: l32r a10, .LCPI11_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI11_2 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { +; XTENSA-LABEL: test_bitreverse_bswap_i64: +; XTENSA: srli a8, a2, 4 +; XTENSA-NEXT: l32r a9, .LCPI12_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a10, a2, a9 +; XTENSA-NEXT: slli a10, a10, 4 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: srli a10, a8, 2 +; XTENSA-NEXT: l32r a11, .LCPI12_1 +; XTENSA-NEXT: and a10, a10, a11 +; XTENSA-NEXT: and a8, a8, a11 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: srli a10, a8, 1 +; XTENSA-NEXT: l32r a7, .LCPI12_2 +; XTENSA-NEXT: and a10, a10, a7 +; XTENSA-NEXT: and a8, a8, a7 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a2, a10, a8 +; XTENSA-NEXT: srli a8, a3, 4 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: and a9, a3, a9 +; XTENSA-NEXT: slli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: and a9, a9, a11 +; XTENSA-NEXT: and a8, a8, a11 +; XTENSA-NEXT: slli a8, a8, 2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: and a9, a9, a7 +; XTENSA-NEXT: and a8, a8, a7 +; XTENSA-NEXT: slli a8, a8, 1 +; XTENSA-NEXT: or a3, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp) + ret i64 %tmp2 +} diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll new file mode 100644 index 0000000000000..030f2a0fbfdc7 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -0,0 +1,531 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +declare i8 @llvm.cttz.i8(i8, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.cttz.i64(i64, i1) +declare i8 @llvm.ctlz.i8(i8, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i8 @llvm.ctpop.i8(i8) +declare i16 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) +declare i64 @llvm.ctpop.i64(i64) + +define i8 @test_cttz_i8(i8 %a) nounwind { +; XTENSA-LABEL: test_cttz_i8: +; XTENSA: movi a8, 255 +; XTENSA-NEXT: and a9, a2, a8 +; XTENSA-NEXT: movi a8, 8 +; XTENSA-NEXT: beqz a9, .LBB0_2 +; XTENSA-NEXT: j .LBB0_1 +; XTENSA-NEXT: .LBB0_1: # %cond.false +; XTENSA-NEXT: movi a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: .LBB0_2: # %cond.end +; XTENSA-NEXT: or a2, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_cttz_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_cttz_i16: +; XTENSA: l32r a8, .LCPI1_0 +; XTENSA-NEXT: and a9, a2, a8 +; XTENSA-NEXT: movi a8, 16 +; XTENSA-NEXT: beqz a9, .LBB1_2 +; XTENSA-NEXT: j .LBB1_1 +; XTENSA-NEXT: .LBB1_1: # %cond.false +; XTENSA-NEXT: movi a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI1_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI1_2 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: l32r a10, .LCPI1_3 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: .LBB1_2: # %cond.end +; XTENSA-NEXT: or a2, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_cttz_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_cttz_i32: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a8, a2, a2 +; XTENSA-NEXT: movi a2, 32 +; XTENSA-NEXT: beqz a8, .LBB2_2 +; XTENSA-NEXT: j .LBB2_1 +; XTENSA-NEXT: .LBB2_1: # %cond.false +; XTENSA-NEXT: neg a9, a8 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a3, .LCPI2_0 +; XTENSA-NEXT: l32r a8, .LCPI2_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: movi a8, 27 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: l32r a9, .LCPI2_2 +; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: .LBB2_2: # %cond.end +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind { +; XTENSA-LABEL: test_cttz_i8_zero_undef: +; XTENSA: movi a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: ret + %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) + ret i8 %tmp +} + +define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { +; XTENSA-LABEL: test_cttz_i16_zero_undef: +; XTENSA: movi a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI4_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI4_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: l32r a10, .LCPI4_2 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: add a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) + ret i16 %tmp +} + +define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { +; XTENSA-LABEL: test_cttz_i32_zero_undef: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: neg a8, a2 +; XTENSA-NEXT: and a2, a2, a8 +; XTENSA-NEXT: l32r a3, .LCPI5_0 +; XTENSA-NEXT: l32r a8, .LCPI5_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: movi a8, 27 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: l32r a9, .LCPI5_2 +; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true) + ret i32 %tmp +} + +define i8 @test_ctlz_i8(i8 %a) nounwind { +; XTENSA-LABEL: test_ctlz_i8: +; XTENSA: movi a8, 255 +; XTENSA-NEXT: and a9, a2, a8 +; XTENSA-NEXT: movi a8, 8 +; XTENSA-NEXT: beqz a9, .LBB6_2 +; XTENSA-NEXT: j .LBB6_1 +; XTENSA-NEXT: .LBB6_1: # %cond.false +; XTENSA-NEXT: movi a8, 254 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a8, a8, 1 +; XTENSA-NEXT: or a8, a2, a8 +; XTENSA-NEXT: movi a9, 252 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 2 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, 240 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, -1 +; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: .LBB6_2: # %cond.end +; XTENSA-NEXT: or a2, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_ctlz_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_ctlz_i16: +; XTENSA: l32r a8, .LCPI7_0 +; XTENSA-NEXT: and a9, a2, a8 +; XTENSA-NEXT: movi a8, 16 +; XTENSA-NEXT: beqz a9, .LBB7_2 +; XTENSA-NEXT: j .LBB7_1 +; XTENSA-NEXT: .LBB7_1: # %cond.false +; XTENSA-NEXT: l32r a8, .LCPI7_1 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a8, a8, 1 +; XTENSA-NEXT: or a8, a2, a8 +; XTENSA-NEXT: l32r a9, .LCPI7_2 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 2 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI7_3 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI7_4 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 8 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, -1 +; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI7_5 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI7_6 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: l32r a10, .LCPI7_7 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: .LBB7_2: # %cond.end +; XTENSA-NEXT: or a2, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_ctlz_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_ctlz_i32: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a8, a2, a2 +; XTENSA-NEXT: movi a2, 32 +; XTENSA-NEXT: beqz a8, .LBB8_2 +; XTENSA-NEXT: j .LBB8_1 +; XTENSA-NEXT: .LBB8_1: # %cond.false +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, 16 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a8 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, -1 +; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI8_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI8_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI8_2 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a3, .LCPI8_3 +; XTENSA-NEXT: l32r a8, .LCPI8_4 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: movi a8, 24 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a2, a2 +; XTENSA-NEXT: .LBB8_2: # %cond.end +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind { +; XTENSA-LABEL: test_ctlz_i8_zero_undef: +; XTENSA: movi a8, 254 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a8, a8, 1 +; XTENSA-NEXT: or a8, a2, a8 +; XTENSA-NEXT: movi a9, 252 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 2 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, 240 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, -1 +; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: ret + %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true) + ret i8 %tmp +} + +define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { +; XTENSA-LABEL: test_ctlz_i16_zero_undef: +; XTENSA: l32r a8, .LCPI10_0 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a8, a8, 1 +; XTENSA-NEXT: or a8, a2, a8 +; XTENSA-NEXT: l32r a9, .LCPI10_1 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 2 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI10_2 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI10_3 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: srli a9, a9, 8 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, -1 +; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI10_4 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI10_5 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: l32r a10, .LCPI10_6 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: add a2, a9, a8 +; XTENSA-NEXT: ret + %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true) + ret i16 %tmp +} + +define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { +; XTENSA-LABEL: test_ctlz_i32_zero_undef: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: srli a8, a2, 1 +; XTENSA-NEXT: or a8, a2, a8 +; XTENSA-NEXT: srli a9, a8, 2 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, 16 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a8 +; XTENSA-NEXT: or a8, a8, a9 +; XTENSA-NEXT: movi a9, -1 +; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI11_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI11_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI11_2 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a3, .LCPI11_3 +; XTENSA-NEXT: l32r a8, .LCPI11_4 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: movi a8, 24 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a2, a2 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true) + ret i32 %tmp +} + +define i8 @test_ctpop_i8(i8 %a) nounwind { +; XTENSA-LABEL: test_ctpop_i8: +; XTENSA: srli a8, a2, 1 +; XTENSA-NEXT: movi a9, 85 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: sub a8, a2, a8 +; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: ret + %1 = call i8 @llvm.ctpop.i8(i8 %a) + ret i8 %1 +} + +define i16 @test_ctpop_i16(i16 %a) nounwind { +; XTENSA-LABEL: test_ctpop_i16: +; XTENSA: srli a8, a2, 1 +; XTENSA-NEXT: l32r a9, .LCPI13_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: sub a8, a2, a8 +; XTENSA-NEXT: l32r a9, .LCPI13_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a9, a8, a9 +; XTENSA-NEXT: l32r a10, .LCPI13_2 +; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: srli a8, a8, 8 +; XTENSA-NEXT: add a2, a9, a8 +; XTENSA-NEXT: ret + %1 = call i16 @llvm.ctpop.i16(i16 %a) + ret i16 %1 +} + +define i32 @test_ctpop_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_ctpop_i32: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: srli a8, a2, 1 +; XTENSA-NEXT: l32r a9, .LCPI14_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: sub a8, a2, a8 +; XTENSA-NEXT: l32r a9, .LCPI14_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI14_2 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a3, .LCPI14_3 +; XTENSA-NEXT: l32r a8, .LCPI14_4 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: movi a8, 24 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a2, a2 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/Xtensa/div.ll b/llvm/test/CodeGen/Xtensa/div.ll new file mode 100644 index 0000000000000..fcb58eb5bff53 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/div.ll @@ -0,0 +1,491 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +define i32 @udiv(i32 %a, i32 %b) nounwind { +; XTENSA-LABEL: udiv: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI0_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i32 %a, %b + ret i32 %1 +} + +define i32 @udiv_constant(i32 %a) nounwind { +; XTENSA-LABEL: udiv_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, 5 +; XTENSA-NEXT: l32r a8, .LCPI1_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i32 %a, 5 + ret i32 %1 +} + +define i32 @udiv_pow2(i32 %a) nounwind { +; XTENSA-LABEL: udiv_pow2: +; XTENSA: srli a2, a2, 3 +; XTENSA-NEXT: ret + %1 = udiv i32 %a, 8 + ret i32 %1 +} + +define i32 @udiv_constant_lhs(i32 %a) nounwind { +; XTENSA-LABEL: udiv_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a2, a2 +; XTENSA-NEXT: movi a2, 10 +; XTENSA-NEXT: l32r a8, .LCPI3_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i32 10, %a + ret i32 %1 +} + +define i64 @udiv64(i64 %a, i64 %b) nounwind { +; XTENSA-LABEL: udiv64: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI4_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i64 %a, %b + ret i64 %1 +} + +define i64 @udiv64_constant(i64 %a) nounwind { +; XTENSA-LABEL: udiv64_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, 5 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI5_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i64 %a, 5 + ret i64 %1 +} + +define i64 @udiv64_constant_lhs(i64 %a) nounwind { +; XTENSA-LABEL: udiv64_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a5, a3, a3 +; XTENSA-NEXT: or a4, a2, a2 +; XTENSA-NEXT: movi a2, 10 +; XTENSA-NEXT: movi a3, 0 +; XTENSA-NEXT: l32r a8, .LCPI6_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i64 10, %a + ret i64 %1 +} + +define i8 @udiv8(i8 %a, i8 %b) nounwind { +; XTENSA-LABEL: udiv8: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a8, 255 +; XTENSA-NEXT: and a2, a2, a8 +; XTENSA-NEXT: and a3, a3, a8 +; XTENSA-NEXT: l32r a8, .LCPI7_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i8 %a, %b + ret i8 %1 +} + +define i8 @udiv8_constant(i8 %a) nounwind { +; XTENSA-LABEL: udiv8_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a8, 255 +; XTENSA-NEXT: and a2, a2, a8 +; XTENSA-NEXT: movi a3, 5 +; XTENSA-NEXT: l32r a8, .LCPI8_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i8 %a, 5 + ret i8 %1 +} + +define i8 @udiv8_pow2(i8 %a) nounwind { +; XTENSA-LABEL: udiv8_pow2: +; XTENSA: movi a8, 248 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a2, a8, 3 +; XTENSA-NEXT: ret + %1 = udiv i8 %a, 8 + ret i8 %1 +} + +define i8 @udiv8_constant_lhs(i8 %a) nounwind { +; XTENSA-LABEL: udiv8_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a8, 255 +; XTENSA-NEXT: and a3, a2, a8 +; XTENSA-NEXT: movi a2, 10 +; XTENSA-NEXT: l32r a8, .LCPI10_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i8 10, %a + ret i8 %1 +} + +define i16 @udiv16(i16 %a, i16 %b) nounwind { +; XTENSA-LABEL: udiv16: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI11_0 +; XTENSA-NEXT: and a2, a2, a8 +; XTENSA-NEXT: and a3, a3, a8 +; XTENSA-NEXT: l32r a8, .LCPI11_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i16 %a, %b + ret i16 %1 +} + +define i16 @udiv16_constant(i16 %a) nounwind { +; XTENSA-LABEL: udiv16_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI12_0 +; XTENSA-NEXT: and a2, a2, a8 +; XTENSA-NEXT: movi a3, 5 +; XTENSA-NEXT: l32r a8, .LCPI12_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = udiv i16 %a, 5 + ret i16 %1 +} + +define i16 @udiv16_pow2(i16 %a) nounwind { +; XTENSA-LABEL: udiv16_pow2: +; XTENSA: l32r a8, .LCPI13_0 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a2, a8, 3 +; XTENSA-NEXT: ret + %1 = udiv i16 %a, 8 + ret i16 %1 +} + +define i32 @sdiv(i32 %a, i32 %b) nounwind { +; XTENSA-LABEL: sdiv: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI14_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i32 %a, %b + ret i32 %1 +} + +define i32 @sdiv_constant_lhs(i32 %a) nounwind { +; XTENSA-LABEL: sdiv_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a2, a2 +; XTENSA-NEXT: movi a2, -10 +; XTENSA-NEXT: l32r a8, .LCPI15_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i32 -10, %a + ret i32 %1 +} + +define i64 @sdiv64(i64 %a, i64 %b) nounwind { +; XTENSA-LABEL: sdiv64: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI16_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i64 %a, %b + ret i64 %1 +} + +define i64 @sdiv64_constant(i64 %a) nounwind { +; XTENSA-LABEL: sdiv64_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, 5 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI17_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i64 %a, 5 + ret i64 %1 +} + +define i64 @sdiv64_constant_lhs(i64 %a) nounwind { +; XTENSA-LABEL: sdiv64_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a5, a3, a3 +; XTENSA-NEXT: or a4, a2, a2 +; XTENSA-NEXT: movi a2, 10 +; XTENSA-NEXT: movi a3, 0 +; XTENSA-NEXT: l32r a8, .LCPI18_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i64 10, %a + ret i64 %1 +} + + +define i64 @sdiv64_sext_operands(i32 %a, i32 %b) nounwind { +; XTENSA-LABEL: sdiv64_sext_operands: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a3, a3 +; XTENSA-NEXT: srai a3, a2, 31 +; XTENSA-NEXT: srai a5, a4, 31 +; XTENSA-NEXT: l32r a8, .LCPI19_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sext i32 %a to i64 + %2 = sext i32 %b to i64 + %3 = sdiv i64 %1, %2 + ret i64 %3 +} + +define i8 @sdiv8(i8 %a, i8 %b) nounwind { +; XTENSA-LABEL: sdiv8: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: slli a8, a2, 24 +; XTENSA-NEXT: srai a2, a8, 24 +; XTENSA-NEXT: slli a8, a3, 24 +; XTENSA-NEXT: srai a3, a8, 24 +; XTENSA-NEXT: l32r a8, .LCPI20_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i8 %a, %b + ret i8 %1 +} + +define i8 @sdiv8_constant(i8 %a) nounwind { +; XTENSA-LABEL: sdiv8_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: slli a8, a2, 24 +; XTENSA-NEXT: srai a2, a8, 24 +; XTENSA-NEXT: movi a3, 5 +; XTENSA-NEXT: l32r a8, .LCPI21_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i8 %a, 5 + ret i8 %1 +} + +define i8 @sdiv8_pow2(i8 %a) nounwind { +; XTENSA-LABEL: sdiv8_pow2: +; XTENSA: slli a8, a2, 24 +; XTENSA-NEXT: srai a8, a8, 24 +; XTENSA-NEXT: srli a8, a8, 12 +; XTENSA-NEXT: movi a9, 7 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a2, a8 +; XTENSA-NEXT: slli a8, a8, 24 +; XTENSA-NEXT: srai a2, a8, 27 +; XTENSA-NEXT: ret + %1 = sdiv i8 %a, 8 + ret i8 %1 +} + +define i8 @sdiv8_constant_lhs(i8 %a) nounwind { +; XTENSA-LABEL: sdiv8_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: slli a8, a2, 24 +; XTENSA-NEXT: srai a3, a8, 24 +; XTENSA-NEXT: movi a2, -10 +; XTENSA-NEXT: l32r a8, .LCPI23_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i8 -10, %a + ret i8 %1 +} + +define i16 @sdiv16(i16 %a, i16 %b) nounwind { +; XTENSA-LABEL: sdiv16: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: slli a8, a2, 16 +; XTENSA-NEXT: srai a2, a8, 16 +; XTENSA-NEXT: slli a8, a3, 16 +; XTENSA-NEXT: srai a3, a8, 16 +; XTENSA-NEXT: l32r a8, .LCPI24_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i16 %a, %b + ret i16 %1 +} + +define i16 @sdiv16_constant(i16 %a) nounwind { +; XTENSA-LABEL: sdiv16_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: slli a8, a2, 16 +; XTENSA-NEXT: srai a2, a8, 16 +; XTENSA-NEXT: movi a3, 5 +; XTENSA-NEXT: l32r a8, .LCPI25_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i16 %a, 5 + ret i16 %1 +} + +define i16 @sdiv16_constant_lhs(i16 %a) nounwind { +; XTENSA-LABEL: sdiv16_constant_lhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: slli a8, a2, 16 +; XTENSA-NEXT: srai a3, a8, 16 +; XTENSA-NEXT: movi a2, -10 +; XTENSA-NEXT: l32r a8, .LCPI26_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sdiv i16 -10, %a + ret i16 %1 +} + +define i32 @sdiv_pow2(i32 %a) nounwind { +; XTENSA-LABEL: sdiv_pow2: +; XTENSA: srai a8, a2, 31 +; XTENSA-NEXT: movi a9, 29 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a8, a8 +; XTENSA-NEXT: add a8, a2, a8 +; XTENSA-NEXT: srai a2, a8, 3 +; XTENSA-NEXT: ret + %1 = sdiv i32 %a, 8 + ret i32 %1 +} + +define i32 @sdiv_pow2_2(i32 %a) nounwind { +; XTENSA-LABEL: sdiv_pow2_2: +; XTENSA: srai a8, a2, 31 +; XTENSA-NEXT: movi a9, 16 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a8, a8 +; XTENSA-NEXT: add a8, a2, a8 +; XTENSA-NEXT: srai a2, a8, 16 +; XTENSA-NEXT: ret + %1 = sdiv i32 %a, 65536 + ret i32 %1 +} + +define i16 @sdiv16_pow2(i16 %a) nounwind { +; XTENSA-LABEL: sdiv16_pow2: +; XTENSA: slli a8, a2, 16 +; XTENSA-NEXT: srai a8, a8, 16 +; XTENSA-NEXT: movi a9, 28 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a8, a8 +; XTENSA-NEXT: movi a9, 7 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a2, a8 +; XTENSA-NEXT: slli a8, a8, 16 +; XTENSA-NEXT: srai a2, a8, 19 +; XTENSA-NEXT: ret + %1 = sdiv i16 %a, 8 + ret i16 %1 +} diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll new file mode 100644 index 0000000000000..0be2885458163 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/mul.ll @@ -0,0 +1,636 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +define signext i32 @square(i32 %a) nounwind { +; XTENSA-LABEL: square: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI0_0 +; XTENSA-NEXT: or a3, a2, a2 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, %a + ret i32 %1 +} + +define signext i32 @mul(i32 %a, i32 %b) nounwind { +; XTENSA-LABEL: mul: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI1_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, %b + ret i32 %1 +} + +define signext i32 @mul_constant(i32 %a) nounwind { +; XTENSA-LABEL: mul_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, 5 +; XTENSA-NEXT: l32r a8, .LCPI2_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 5 + ret i32 %1 +} + +define i32 @mul_pow2(i32 %a) nounwind { +; XTENSA-LABEL: mul_pow2: +; XTENSA: slli a2, a2, 3 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 8 + ret i32 %1 +} + +define i64 @mul64(i64 %a, i64 %b) nounwind { +; XTENSA-LABEL: mul64: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a8, .LCPI4_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, %b + ret i64 %1 +} + +define i64 @mul64_constant(i64 %a) nounwind { +; XTENSA-LABEL: mul64_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, 5 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI5_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, 5 + ret i64 %1 +} + +define i32 @mulhs(i32 %a, i32 %b) nounwind { +; XTENSA-LABEL: mulhs: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a3, a3 +; XTENSA-NEXT: srai a3, a2, 31 +; XTENSA-NEXT: srai a5, a4, 31 +; XTENSA-NEXT: l32r a8, .LCPI6_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a3, a3 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sext i32 %a to i64 + %2 = sext i32 %b to i64 + %3 = mul i64 %1, %2 + %4 = lshr i64 %3, 32 + %5 = trunc i64 %4 to i32 + ret i32 %5 +} + +define i32 @mulhs_positive_constant(i32 %a) nounwind { +; XTENSA-LABEL: mulhs_positive_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: srai a3, a2, 31 +; XTENSA-NEXT: movi a4, 5 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI7_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a3, a3 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sext i32 %a to i64 + %2 = mul i64 %1, 5 + %3 = lshr i64 %2, 32 + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +define i32 @mulhs_negative_constant(i32 %a) nounwind { +; XTENSA-LABEL: mulhs_negative_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: srai a3, a2, 31 +; XTENSA-NEXT: movi a4, -5 +; XTENSA-NEXT: movi a5, -1 +; XTENSA-NEXT: l32r a8, .LCPI8_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a3, a3 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = sext i32 %a to i64 + %2 = mul i64 %1, -5 + %3 = lshr i64 %2, 32 + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind { +; XTENSA-LABEL: mulhu: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a3, a3 +; XTENSA-NEXT: movi a3, 0 +; XTENSA-NEXT: l32r a8, .LCPI9_0 +; XTENSA-NEXT: or a5, a3, a3 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a3, a3 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = zext i32 %a to i64 + %2 = zext i32 %b to i64 + %3 = mul i64 %1, %2 + %4 = lshr i64 %3, 32 + %5 = trunc i64 %4 to i32 + ret i32 %5 +} + +define i32 @mulhsu(i32 %a, i32 %b) nounwind { +; XTENSA-LABEL: mulhsu: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a3, a3 +; XTENSA-NEXT: srai a5, a4, 31 +; XTENSA-NEXT: movi a3, 0 +; XTENSA-NEXT: l32r a8, .LCPI10_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a3, a3 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = zext i32 %a to i64 + %2 = sext i32 %b to i64 + %3 = mul i64 %1, %2 + %4 = lshr i64 %3, 32 + %5 = trunc i64 %4 to i32 + ret i32 %5 +} + +define i32 @mulhu_constant(i32 %a) nounwind { +; XTENSA-LABEL: mulhu_constant: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, 5 +; XTENSA-NEXT: movi a3, 0 +; XTENSA-NEXT: l32r a8, .LCPI11_0 +; XTENSA-NEXT: or a5, a3, a3 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a3, a3 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = zext i32 %a to i64 + %2 = mul i64 %1, 5 + %3 = lshr i64 %2, 32 + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +define i32 @muli32_p65(i32 %a) nounwind { +; XTENSA-LABEL: muli32_p65: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, 65 +; XTENSA-NEXT: l32r a8, .LCPI12_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 65 + ret i32 %1 +} + +define i32 @muli32_p63(i32 %a) nounwind { +; XTENSA-LABEL: muli32_p63: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, 63 +; XTENSA-NEXT: l32r a8, .LCPI13_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 63 + ret i32 %1 +} + +define i64 @muli64_p65(i64 %a) nounwind { +; XTENSA-LABEL: muli64_p65: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, 65 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI14_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, 65 + ret i64 %1 +} + +define i64 @muli64_p63(i64 %a) nounwind { +; XTENSA-LABEL: muli64_p63: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, 63 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI15_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, 63 + ret i64 %1 +} + +define i32 @muli32_m63(i32 %a) nounwind { +; XTENSA-LABEL: muli32_m63: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, -63 +; XTENSA-NEXT: l32r a8, .LCPI16_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, -63 + ret i32 %1 +} + +define i32 @muli32_m65(i32 %a) nounwind { +; XTENSA-LABEL: muli32_m65: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, -65 +; XTENSA-NEXT: l32r a8, .LCPI17_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, -65 + ret i32 %1 +} + +define i64 @muli64_m63(i64 %a) nounwind { +; XTENSA-LABEL: muli64_m63: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, -63 +; XTENSA-NEXT: movi a5, -1 +; XTENSA-NEXT: l32r a8, .LCPI18_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, -63 + ret i64 %1 +} + +define i64 @muli64_m65(i64 %a) nounwind { +; XTENSA-LABEL: muli64_m65: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a4, -65 +; XTENSA-NEXT: movi a5, -1 +; XTENSA-NEXT: l32r a8, .LCPI19_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, -65 + ret i64 %1 +} + +define i32 @muli32_p384(i32 %a) nounwind { +; XTENSA-LABEL: muli32_p384: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: movi a3, 384 +; XTENSA-NEXT: l32r a8, .LCPI20_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 384 + ret i32 %1 +} + +define i32 @muli32_p12288(i32 %a) nounwind { +; XTENSA-LABEL: muli32_p12288: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a3, .LCPI21_0 +; XTENSA-NEXT: l32r a8, .LCPI21_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 12288 + ret i32 %1 +} + +define i32 @muli32_p4352(i32 %a) nounwind { +; XTENSA-LABEL: muli32_p4352: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a3, .LCPI22_0 +; XTENSA-NEXT: l32r a8, .LCPI22_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 4352 + ret i32 %1 +} + +define i32 @muli32_p3840(i32 %a) nounwind { +; XTENSA-LABEL: muli32_p3840: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a3, .LCPI23_0 +; XTENSA-NEXT: l32r a8, .LCPI23_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 3840 + ret i32 %1 +} + +define i32 @muli32_m3840(i32 %a) nounwind { +; XTENSA-LABEL: muli32_m3840: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a3, .LCPI24_0 +; XTENSA-NEXT: l32r a8, .LCPI24_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, -3840 + ret i32 %1 +} + +define i32 @muli32_m4352(i32 %a) nounwind { +; XTENSA-LABEL: muli32_m4352: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a3, .LCPI25_0 +; XTENSA-NEXT: l32r a8, .LCPI25_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i32 %a, -4352 + ret i32 %1 +} + +define i64 @muli64_p4352(i64 %a) nounwind { +; XTENSA-LABEL: muli64_p4352: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a4, .LCPI26_0 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI26_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, 4352 + ret i64 %1 +} + +define i64 @muli64_p3840(i64 %a) nounwind { +; XTENSA-LABEL: muli64_p3840: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a4, .LCPI27_0 +; XTENSA-NEXT: movi a5, 0 +; XTENSA-NEXT: l32r a8, .LCPI27_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, 3840 + ret i64 %1 +} + +define i64 @muli64_m4352(i64 %a) nounwind { +; XTENSA-LABEL: muli64_m4352: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a4, .LCPI28_0 +; XTENSA-NEXT: movi a5, -1 +; XTENSA-NEXT: l32r a8, .LCPI28_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, -4352 + ret i64 %1 +} + +define i64 @muli64_m3840(i64 %a) nounwind { +; XTENSA-LABEL: muli64_m3840: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: l32r a4, .LCPI29_0 +; XTENSA-NEXT: movi a5, -1 +; XTENSA-NEXT: l32r a8, .LCPI29_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i64 %a, -3840 + ret i64 %1 +} + +define i128 @muli128_m3840(i128 %a) nounwind { +; XTENSA-LABEL: muli128_m3840: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: movi a7, -1 +; XTENSA-NEXT: s32i a7, a1, 4 +; XTENSA-NEXT: s32i a7, a1, 0 +; XTENSA-NEXT: l32r a6, .LCPI30_0 +; XTENSA-NEXT: l32r a8, .LCPI30_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i128 %a, -3840 + ret i128 %1 +} + +define i128 @muli128_m63(i128 %a) nounwind { +; XTENSA-LABEL: muli128_m63: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: movi a7, -1 +; XTENSA-NEXT: s32i a7, a1, 4 +; XTENSA-NEXT: s32i a7, a1, 0 +; XTENSA-NEXT: movi a6, -63 +; XTENSA-NEXT: l32r a8, .LCPI31_0 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = mul i128 %a, -63 + ret i128 %1 +} + +define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { +; XTENSA-LABEL: mulhsu_i64: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: or a7, a5, a5 +; XTENSA-NEXT: or a6, a4, a4 +; XTENSA-NEXT: srai a8, a7, 31 +; XTENSA-NEXT: s32i a8, a1, 4 +; XTENSA-NEXT: s32i a8, a1, 0 +; XTENSA-NEXT: movi a4, 0 +; XTENSA-NEXT: l32r a8, .LCPI32_0 +; XTENSA-NEXT: or a5, a4, a4 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: or a2, a4, a4 +; XTENSA-NEXT: or a3, a5, a5 +; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %1 = zext i64 %a to i128 + %2 = sext i64 %b to i128 + %3 = mul i128 %1, %2 + %4 = lshr i128 %3, 64 + %5 = trunc i128 %4 to i64 + ret i64 %5 +} + +define i8 @muladd_demand(i8 %x, i8 %y) nounwind { +; XTENSA-LABEL: muladd_demand: +; XTENSA: slli a8, a2, 1 +; XTENSA-NEXT: sub a8, a3, a8 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: ret + %m = mul i8 %x, 14 + %a = add i8 %y, %m + %r = and i8 %a, 15 + ret i8 %r +} + +define i8 @mulsub_demand(i8 %x, i8 %y) nounwind { +; XTENSA-LABEL: mulsub_demand: +; XTENSA: addx2 a8, a2, a3 +; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: ret + %m = mul i8 %x, 14 + %a = sub i8 %y, %m + %r = and i8 %a, 15 + ret i8 %r +} + +define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind { +; XTENSA-LABEL: muladd_demand_2: +; XTENSA: slli a8, a2, 1 +; XTENSA-NEXT: sub a8, a3, a8 +; XTENSA-NEXT: movi a9, -16 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %m = mul i8 %x, 14 + %a = add i8 %y, %m + %r = or i8 %a, 240 + ret i8 %r +} + +define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { +; XTENSA-LABEL: mulsub_demand_2: +; XTENSA: addx2 a8, a2, a3 +; XTENSA-NEXT: movi a9, -16 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %m = mul i8 %x, 14 + %a = sub i8 %y, %m + %r = or i8 %a, 240 + ret i8 %r +} diff --git a/llvm/test/CodeGen/Xtensa/rotl-rotr.ll b/llvm/test/CodeGen/Xtensa/rotl-rotr.ll new file mode 100644 index 0000000000000..1dc52fbc94b41 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/rotl-rotr.ll @@ -0,0 +1,500 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +define i32 @rotl_32(i32 %x, i32 %y) nounwind { +; XTENSA-LABEL: rotl_32: +; XTENSA: ssl a3 +; XTENSA-NEXT: sll a8, a2 +; XTENSA-NEXT: movi a9, 32 +; XTENSA-NEXT: sub a9, a9, a3 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a2 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %z = sub i32 32, %y + %b = shl i32 %x, %y + %c = lshr i32 %x, %z + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotr_32(i32 %x, i32 %y) nounwind { +; XTENSA-LABEL: rotr_32: +; XTENSA: ssr a3 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: movi a9, 32 +; XTENSA-NEXT: sub a9, a9, a3 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a9, a2 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %z = sub i32 32, %y + %b = lshr i32 %x, %y + %c = shl i32 %x, %z + %d = or i32 %b, %c + ret i32 %d +} + +define i64 @rotl_64(i64 %x, i64 %y) nounwind { +; XTENSA-LABEL: rotl_64: +; XTENSA: movi a8, 64 +; XTENSA-NEXT: sub a8, a8, a4 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: src a11, a3, a2 +; XTENSA-NEXT: movi a9, 32 +; XTENSA-NEXT: sub a9, a9, a4 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a7, a3 +; XTENSA-NEXT: movi a10, 0 +; XTENSA-NEXT: blt a9, a10, .LBB2_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: .LBB2_2: +; XTENSA-NEXT: ssl a4 +; XTENSA-NEXT: sll a7, a2 +; XTENSA-NEXT: addi a5, a4, -32 +; XTENSA-NEXT: blt a5, a10, .LBB2_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a7, a10, a10 +; XTENSA-NEXT: .LBB2_4: +; XTENSA-NEXT: ssl a4 +; XTENSA-NEXT: src a6, a3, a2 +; XTENSA-NEXT: ssl a5 +; XTENSA-NEXT: sll a4, a2 +; XTENSA-NEXT: blt a5, a10, .LBB2_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a6, a4, a4 +; XTENSA-NEXT: .LBB2_6: +; XTENSA-NEXT: or a2, a7, a11 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a3 +; XTENSA-NEXT: blt a9, a10, .LBB2_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a8, a10, a10 +; XTENSA-NEXT: .LBB2_8: +; XTENSA-NEXT: or a3, a6, a8 +; XTENSA-NEXT: ret + %z = sub i64 64, %y + %b = shl i64 %x, %y + %c = lshr i64 %x, %z + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotr_64(i64 %x, i64 %y) nounwind { +; XTENSA-LABEL: rotr_64: +; XTENSA: ssr a4 +; XTENSA-NEXT: src a10, a3, a2 +; XTENSA-NEXT: addi a8, a4, -32 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a11, a3 +; XTENSA-NEXT: movi a9, 0 +; XTENSA-NEXT: blt a8, a9, .LBB3_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a10, a11, a11 +; XTENSA-NEXT: .LBB3_2: +; XTENSA-NEXT: movi a11, 32 +; XTENSA-NEXT: sub a7, a11, a4 +; XTENSA-NEXT: movi a11, 64 +; XTENSA-NEXT: sub a11, a11, a4 +; XTENSA-NEXT: ssl a11 +; XTENSA-NEXT: sll a6, a2 +; XTENSA-NEXT: blt a7, a9, .LBB3_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a6, a9, a9 +; XTENSA-NEXT: .LBB3_4: +; XTENSA-NEXT: ssl a11 +; XTENSA-NEXT: src a11, a3, a2 +; XTENSA-NEXT: ssl a7 +; XTENSA-NEXT: sll a5, a2 +; XTENSA-NEXT: blt a7, a9, .LBB3_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a11, a5, a5 +; XTENSA-NEXT: .LBB3_6: +; XTENSA-NEXT: or a2, a10, a6 +; XTENSA-NEXT: ssr a4 +; XTENSA-NEXT: srl a10, a3 +; XTENSA-NEXT: blt a8, a9, .LBB3_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a9, a9 +; XTENSA-NEXT: .LBB3_8: +; XTENSA-NEXT: or a3, a10, a11 +; XTENSA-NEXT: ret + %z = sub i64 64, %y + %b = lshr i64 %x, %y + %c = shl i64 %x, %z + %d = or i64 %b, %c + ret i64 %d +} + +define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind { +; XTENSA-LABEL: rotl_32_mask: +; XTENSA: ssl a3 +; XTENSA-NEXT: sll a8, a2 +; XTENSA-NEXT: neg a9, a3 +; XTENSA-NEXT: movi a10, 31 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a2 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %z = sub i32 0, %y + %and = and i32 %z, 31 + %b = shl i32 %x, %y + %c = lshr i32 %x, %and + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind { +; XTENSA-LABEL: rotl_32_mask_and_63_and_31: +; XTENSA: movi a8, 63 +; XTENSA-NEXT: and a8, a3, a8 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: sll a8, a2 +; XTENSA-NEXT: neg a9, a3 +; XTENSA-NEXT: movi a10, 31 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a2 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %a = and i32 %y, 63 + %b = shl i32 %x, %a + %c = sub i32 0, %y + %d = and i32 %c, 31 + %e = lshr i32 %x, %d + %f = or i32 %b, %e + ret i32 %f +} + +define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind { +; XTENSA-LABEL: rotr_32_mask: +; XTENSA: ssr a3 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: neg a9, a3 +; XTENSA-NEXT: movi a10, 31 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a9, a2 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %z = sub i32 0, %y + %and = and i32 %z, 31 + %b = lshr i32 %x, %y + %c = shl i32 %x, %and + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind { +; XTENSA-LABEL: rotr_32_mask_and_63_and_31: +; XTENSA: movi a8, 63 +; XTENSA-NEXT: and a8, a3, a8 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: neg a9, a3 +; XTENSA-NEXT: movi a10, 31 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a9, a2 +; XTENSA-NEXT: or a2, a8, a9 +; XTENSA-NEXT: ret + %a = and i32 %y, 63 + %b = lshr i32 %x, %a + %c = sub i32 0, %y + %d = and i32 %c, 31 + %e = shl i32 %x, %d + %f = or i32 %b, %e + ret i32 %f +} + +define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { +; XTENSA-LABEL: rotl_64_mask: +; XTENSA: ssl a4 +; XTENSA-NEXT: src a10, a3, a2 +; XTENSA-NEXT: addi a8, a4, -32 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: sll a11, a2 +; XTENSA-NEXT: movi a9, 0 +; XTENSA-NEXT: blt a8, a9, .LBB8_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a10, a11, a11 +; XTENSA-NEXT: .LBB8_2: +; XTENSA-NEXT: neg a11, a4 +; XTENSA-NEXT: movi a7, 63 +; XTENSA-NEXT: and a7, a11, a7 +; XTENSA-NEXT: ssr a7 +; XTENSA-NEXT: srl a11, a3 +; XTENSA-NEXT: addi a6, a7, -32 +; XTENSA-NEXT: blt a6, a9, .LBB8_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a11, a9, a9 +; XTENSA-NEXT: .LBB8_4: +; XTENSA-NEXT: ssr a7 +; XTENSA-NEXT: src a7, a3, a2 +; XTENSA-NEXT: ssr a6 +; XTENSA-NEXT: srl a5, a3 +; XTENSA-NEXT: blt a6, a9, .LBB8_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a7, a5, a5 +; XTENSA-NEXT: .LBB8_6: +; XTENSA-NEXT: or a3, a10, a11 +; XTENSA-NEXT: ssl a4 +; XTENSA-NEXT: sll a10, a2 +; XTENSA-NEXT: blt a8, a9, .LBB8_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a9, a9 +; XTENSA-NEXT: .LBB8_8: +; XTENSA-NEXT: or a2, a10, a7 +; XTENSA-NEXT: ret + %z = sub i64 0, %y + %and = and i64 %z, 63 + %b = shl i64 %x, %y + %c = lshr i64 %x, %and + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { +; XTENSA-LABEL: rotl_64_mask_and_127_and_63: +; XTENSA: movi a8, 127 +; XTENSA-NEXT: and a8, a4, a8 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: src a11, a3, a2 +; XTENSA-NEXT: addi a9, a8, -32 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a7, a2 +; XTENSA-NEXT: movi a10, 0 +; XTENSA-NEXT: blt a9, a10, .LBB9_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: .LBB9_2: +; XTENSA-NEXT: neg a7, a4 +; XTENSA-NEXT: movi a6, 63 +; XTENSA-NEXT: and a6, a7, a6 +; XTENSA-NEXT: ssr a6 +; XTENSA-NEXT: srl a7, a3 +; XTENSA-NEXT: addi a5, a6, -32 +; XTENSA-NEXT: blt a5, a10, .LBB9_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a7, a10, a10 +; XTENSA-NEXT: .LBB9_4: +; XTENSA-NEXT: ssr a6 +; XTENSA-NEXT: src a6, a3, a2 +; XTENSA-NEXT: ssr a5 +; XTENSA-NEXT: srl a4, a3 +; XTENSA-NEXT: blt a5, a10, .LBB9_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a6, a4, a4 +; XTENSA-NEXT: .LBB9_6: +; XTENSA-NEXT: or a3, a11, a7 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: sll a8, a2 +; XTENSA-NEXT: blt a9, a10, .LBB9_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a8, a10, a10 +; XTENSA-NEXT: .LBB9_8: +; XTENSA-NEXT: or a2, a8, a6 +; XTENSA-NEXT: ret + %a = and i64 %y, 127 + %b = shl i64 %x, %a + %c = sub i64 0, %y + %d = and i64 %c, 63 + %e = lshr i64 %x, %d + %f = or i64 %b, %e + ret i64 %f +} + +define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { +; XTENSA-LABEL: rotr_64_mask: +; XTENSA: ssr a4 +; XTENSA-NEXT: src a10, a3, a2 +; XTENSA-NEXT: addi a8, a4, -32 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a11, a3 +; XTENSA-NEXT: movi a9, 0 +; XTENSA-NEXT: blt a8, a9, .LBB10_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a10, a11, a11 +; XTENSA-NEXT: .LBB10_2: +; XTENSA-NEXT: neg a11, a4 +; XTENSA-NEXT: movi a7, 63 +; XTENSA-NEXT: and a7, a11, a7 +; XTENSA-NEXT: ssl a7 +; XTENSA-NEXT: sll a11, a2 +; XTENSA-NEXT: addi a6, a7, -32 +; XTENSA-NEXT: blt a6, a9, .LBB10_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a11, a9, a9 +; XTENSA-NEXT: .LBB10_4: +; XTENSA-NEXT: ssl a7 +; XTENSA-NEXT: src a7, a3, a2 +; XTENSA-NEXT: ssl a6 +; XTENSA-NEXT: sll a5, a2 +; XTENSA-NEXT: blt a6, a9, .LBB10_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a7, a5, a5 +; XTENSA-NEXT: .LBB10_6: +; XTENSA-NEXT: or a2, a10, a11 +; XTENSA-NEXT: ssr a4 +; XTENSA-NEXT: srl a10, a3 +; XTENSA-NEXT: blt a8, a9, .LBB10_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a9, a9 +; XTENSA-NEXT: .LBB10_8: +; XTENSA-NEXT: or a3, a10, a7 +; XTENSA-NEXT: ret + %z = sub i64 0, %y + %and = and i64 %z, 63 + %b = lshr i64 %x, %y + %c = shl i64 %x, %and + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { +; XTENSA-LABEL: rotr_64_mask_and_127_and_63: +; XTENSA: movi a8, 127 +; XTENSA-NEXT: and a8, a4, a8 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: src a11, a3, a2 +; XTENSA-NEXT: addi a9, a8, -32 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a7, a3 +; XTENSA-NEXT: movi a10, 0 +; XTENSA-NEXT: blt a9, a10, .LBB11_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: .LBB11_2: +; XTENSA-NEXT: neg a7, a4 +; XTENSA-NEXT: movi a6, 63 +; XTENSA-NEXT: and a6, a7, a6 +; XTENSA-NEXT: ssl a6 +; XTENSA-NEXT: sll a7, a2 +; XTENSA-NEXT: addi a5, a6, -32 +; XTENSA-NEXT: blt a5, a10, .LBB11_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a7, a10, a10 +; XTENSA-NEXT: .LBB11_4: +; XTENSA-NEXT: ssl a6 +; XTENSA-NEXT: src a6, a3, a2 +; XTENSA-NEXT: ssl a5 +; XTENSA-NEXT: sll a4, a2 +; XTENSA-NEXT: blt a5, a10, .LBB11_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a6, a4, a4 +; XTENSA-NEXT: .LBB11_6: +; XTENSA-NEXT: or a2, a11, a7 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a3 +; XTENSA-NEXT: blt a9, a10, .LBB11_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a8, a10, a10 +; XTENSA-NEXT: .LBB11_8: +; XTENSA-NEXT: or a3, a8, a6 +; XTENSA-NEXT: ret + %a = and i64 %y, 127 + %b = lshr i64 %x, %a + %c = sub i64 0, %y + %d = and i64 %c, 63 + %e = shl i64 %x, %d + %f = or i64 %b, %e + ret i64 %f +} + +define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; XTENSA-LABEL: rotl_32_mask_shared: +; XTENSA: movi a8, 31 +; XTENSA-NEXT: and a9, a4, a8 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a10, a2 +; XTENSA-NEXT: neg a11, a4 +; XTENSA-NEXT: and a8, a11, a8 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a9, a3 +; XTENSA-NEXT: add a2, a8, a9 +; XTENSA-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = shl i32 %b, %maskedamt + %3 = add i32 %1, %2 + ret i32 %3 +} +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; XTENSA-LABEL: rotr_32_mask_shared: +; XTENSA: movi a8, 31 +; XTENSA-NEXT: and a9, a4, a8 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a10, a2 +; XTENSA-NEXT: neg a11, a4 +; XTENSA-NEXT: and a8, a11, a8 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: sll a8, a2 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a9, a3 +; XTENSA-NEXT: add a2, a8, a9 +; XTENSA-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = shl i32 %b, %maskedamt + %3 = add i32 %1, %2 + ret i32 %3 +} +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; XTENSA-LABEL: rotl_32_mask_multiple: +; XTENSA: movi a8, 31 +; XTENSA-NEXT: and a9, a4, a8 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a10, a3 +; XTENSA-NEXT: neg a11, a4 +; XTENSA-NEXT: and a8, a11, a8 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a11, a3 +; XTENSA-NEXT: or a10, a10, a11 +; XTENSA-NEXT: ssl a9 +; XTENSA-NEXT: sll a9, a2 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: add a2, a8, a10 +; XTENSA-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt) + %3 = add i32 %1, %2 + ret i32 %3 +} + +define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind { +; XTENSA-LABEL: rotr_32_mask_multiple: +; XTENSA: movi a8, 31 +; XTENSA-NEXT: and a9, a4, a8 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a10, a3 +; XTENSA-NEXT: neg a11, a4 +; XTENSA-NEXT: and a8, a11, a8 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: sll a11, a3 +; XTENSA-NEXT: or a10, a10, a11 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a9, a2 +; XTENSA-NEXT: ssl a8 +; XTENSA-NEXT: sll a8, a2 +; XTENSA-NEXT: or a8, a9, a8 +; XTENSA-NEXT: add a2, a8, a10 +; XTENSA-NEXT: ret + %maskedamt = and i32 %amt, 31 + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) + %2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt) + %3 = add i32 %1, %2 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/Xtensa/shift.ll b/llvm/test/CodeGen/Xtensa/shift.ll new file mode 100644 index 0000000000000..acca8551fa621 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/shift.ll @@ -0,0 +1,72 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define i64 @lshl_64(i64 %x, i64 %y) nounwind { +; CHECK-LABEL: lshl_64: +; CHECK: ssl a4 +; CHECK-NEXT: src a3, a3, a2 +; CHECK-NEXT: addi a8, a4, -32 +; CHECK-NEXT: ssl a8 +; CHECK-NEXT: sll a10, a2 +; CHECK-NEXT: movi a9, 0 +; CHECK-NEXT: blt a8, a9, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: or a3, a10, a10 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: ssl a4 +; CHECK-NEXT: sll a2, a2 +; CHECK-NEXT: blt a8, a9, .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: or a2, a9, a9 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: ret + %c = shl i64 %x, %y + ret i64 %c +} + +define i64 @lshr_64(i64 %x, i64 %y) nounwind { +; CHECK-LABEL: lshr_64: +; CHECK: ssr a4 +; CHECK-NEXT: src a2, a3, a2 +; CHECK-NEXT: addi a8, a4, -32 +; CHECK-NEXT: ssr a8 +; CHECK-NEXT: srl a10, a3 +; CHECK-NEXT: movi a9, 0 +; CHECK-NEXT: blt a8, a9, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: or a2, a10, a10 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: ssr a4 +; CHECK-NEXT: srl a3, a3 +; CHECK-NEXT: blt a8, a9, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: or a3, a9, a9 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ret + %c = lshr i64 %x, %y + ret i64 %c +} + +define i64 @ashr_64(i64 %x, i64 %y) nounwind { +; CHECK-LABEL: ashr_64: +; CHECK: ssr a4 +; CHECK-NEXT: src a2, a3, a2 +; CHECK-NEXT: addi a9, a4, -32 +; CHECK-NEXT: ssr a9 +; CHECK-NEXT: sra a8, a3 +; CHECK-NEXT: movi a10, 0 +; CHECK-NEXT: blt a9, a10, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: or a2, a8, a8 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: ssr a4 +; CHECK-NEXT: sra a8, a3 +; CHECK-NEXT: blt a9, a10, .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: srai a8, a3, 31 +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: or a3, a8, a8 +; CHECK-NEXT: ret + %c = ashr i64 %x, %y + ret i64 %c +} From 1cb9e7c68399ccc626d3df282bb41ceec77b690e Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Fri, 26 Jul 2024 18:03:37 +0300 Subject: [PATCH 02/11] [Xtensa] Minor fixes in constant pool lowering. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 22 ++++----- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 2 +- .../CodeGen/Xtensa/constantpool-aggregate.ll | 45 +++++++++++++++++++ 3 files changed, 57 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 8c30dbbad821e..2253d18c7ff81 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -690,25 +690,25 @@ SDValue XtensaTargetLowering::getAddrPCRel(SDValue Op, return DAG.getNode(XtensaISD::PCREL_WRAPPER, DL, Ty, Op); } -SDValue XtensaTargetLowering::LowerConstantPool(ConstantPoolSDNode *CP, +SDValue XtensaTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - auto C = const_cast(CP->getConstVal()); - auto T = const_cast(CP->getType()); + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast(Op); + auto C = const_cast(CP->getConstVal()); + auto T = CP->getType(); SDValue Result; // Do not use constant pool for aggregate or vector constant types, // in such cases create global variable, for example to store tabel // when we lower CTTZ operation. - if (T->isAggregateType() || T->isVectorTy()) { - auto AFI = DAG.getMachineFunction().getInfo(); - auto M = const_cast( - DAG.getMachineFunction().getFunction().getParent()); + if (T->isAggregateType()) { + MachineFunction &MF = DAG.getMachineFunction(); + auto AFI = MF.getInfo(); + auto M = const_cast(MF.getFunction().getParent()); auto GV = new GlobalVariable( *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + - Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + - Twine(AFI->createLabelUId())); + Twine(MF.getFunctionNumber()) + "_" + Twine(AFI->createLabelUId())); Result = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); } else { if (!CP->isMachineConstantPoolEntry()) { @@ -898,7 +898,7 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::ConstantPool: - return LowerConstantPool(cast(Op), DAG); + return LowerConstantPool(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SELECT_CC: diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index b4c4929922cbf..8e18b50f211da 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -109,7 +109,7 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll b/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll new file mode 100644 index 0000000000000..3ace3a6d604b3 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +; Check that we place const array (CTTZ lookup table) in global variable, +; instead of constant pool and place label to this table in constant pool. + +; CHECK: .literal_position +; CHECK-NEXT: .literal .LCPI0_0, 125613361 +; CHECK-NEXT: .literal .LCPI0_1, __mulsi3 +; CHECK-NEXT: .literal .LCPI0_2, .LCP0_0 +; CHECK-NEXT: .global test_cttz_i32 + +define i32 @test_cttz_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_cttz_i32: +; XTENSA: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: or a8, a2, a2 +; XTENSA-NEXT: movi a2, 32 +; XTENSA-NEXT: beqz a8, .LBB0_2 +; XTENSA-NEXT: j .LBB2_1 +; XTENSA-NEXT: .LBB2_1: # %cond.false +; XTENSA-NEXT: neg a9, a8 +; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a3, .LCPI0_0 +; XTENSA-NEXT: l32r a8, .LCPI0_1 +; XTENSA-NEXT: callx0 a8 +; XTENSA-NEXT: movi a8, 27 +; XTENSA-NEXT: ssr a8 +; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: l32r a9, .LCPI0_2 +; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: .LBB2_2: # %cond.end +; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +; CHECK: .LCP0_0: +; CHECK-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t" +; CHECK-NEXT: .size .LCP0_0, 32 From 49757d67ac0f61e612e0c6ac10aba4bb63b79c23 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Fri, 26 Jul 2024 20:49:19 +0300 Subject: [PATCH 03/11] [Xtensa] Minor code formatting. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 2253d18c7ff81..229bbc25cb28d 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -694,7 +694,7 @@ SDValue XtensaTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); - auto C = const_cast(CP->getConstVal()); + auto C = const_cast(CP->getConstVal()); auto T = CP->getType(); SDValue Result; From ac62be1a74983beee719ee0f4125c2d1147b6262 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Thu, 1 Aug 2024 12:52:50 +0300 Subject: [PATCH 04/11] [Xtensa] Transform multipy by constant. Implement decomposeMulByConstant function and remove lowering Mul operation. Minor fixes in lowering constant pool. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 53 +++----- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 9 +- llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 69 ++++------- llvm/test/CodeGen/Xtensa/mul.ll | 24 ++++ llvm/test/CodeGen/Xtensa/shift.ll | 115 ++++++++++++++++-- 5 files changed, 180 insertions(+), 90 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 229bbc25cb28d..b87c081a62c96 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -100,7 +100,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setCondCodeAction(ISD::SETUGT, MVT::i32, Expand); setCondCodeAction(ISD::SETULE, MVT::i32, Expand); - setOperationAction(ISD::MUL, MVT::i32, Custom); + setOperationAction(ISD::MUL, MVT::i32, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); @@ -655,10 +655,12 @@ SDValue XtensaTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); const DataLayout &TD = DAG.getDataLayout(); EVT PtrVT = Table.getValueType(); - unsigned EntrySize = MJTI->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, DL, Index.getValueType(), Index, - DAG.getConstant(EntrySize, DL, Index.getValueType())); + assert((MJTI->getEntrySize(TD) == 4) && "Unsupported jump-table entry size"); + + Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(), Index, + DAG.getConstant(2, DL, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, DL, Index.getValueType(), Index, Table); SDValue LD = DAG.getLoad(PtrVT, DL, Chain, Addr, @@ -852,36 +854,23 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, return DAG.getMergeValues(Ops, DL); } -SDValue XtensaTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op->getValueType(0); - SDLoc DL(Op); +bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { + if (!VT.isScalarInteger()) + return false; - if (VT != MVT::i32) - return SDValue(); - - ConstantSDNode *C = dyn_cast(Op->getOperand(1)); - if (!C) - return SDValue(); - - int64_t MulAmt = C->getSExtValue(); - unsigned ShiftAmt = 0; - - switch (MulAmt) { - case 2: - ShiftAmt = 1; - break; - case 4: - ShiftAmt = 2; - break; - case 8: - ShiftAmt = 3; - break; - default: - return SDValue(); + // Omit if data size exceeds. + if (VT.getSizeInBits() > 32) + return false; + + if (auto *ConstNode = dyn_cast(C.getNode())) { + const APInt &Imm = ConstNode->getAPIntValue(); + // Convert MULT to LSL. + if (Imm.isPowerOf2() && Imm.isIntN(5)) + return true; } - return DAG.getNode(ISD::SHL, DL, VT, Op->getOperand(0), - DAG.getConstant(ShiftAmt, DL, VT)); + return false; } SDValue XtensaTargetLowering::LowerOperation(SDValue Op, @@ -899,8 +888,6 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerJumpTable(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::MUL: - return LowerMUL(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STACKSAVE: diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 8e18b50f211da..861c3c58847a1 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -41,8 +41,10 @@ enum { // condition code in op #4 SELECT_CC, - // Shift + // SRCL(R) performs shift left(right) of the concatenation of 2 registers + // and returns high(low) 32-bit part of 64-bit result SRCL, + // Shift Right Combined SRCR, }; } @@ -90,6 +92,9 @@ class XtensaTargetLowering : public TargetLowering { const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; + const XtensaSubtarget &getSubtarget() const { return Subtarget; } MachineBasicBlock * @@ -111,8 +116,6 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index 030f2a0fbfdc7..81e4a04c6d23e 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -282,10 +282,7 @@ define i16 @test_ctlz_i16(i16 %a) nounwind { define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i32: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill -; XTENSA-NEXT: or a8, a2, a2 +; XTENSA: or a8, a2, a2 ; XTENSA-NEXT: movi a2, 32 ; XTENSA-NEXT: beqz a8, .LBB8_2 ; XTENSA-NEXT: j .LBB8_1 @@ -316,17 +313,15 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI8_2 -; XTENSA-NEXT: and a2, a8, a9 -; XTENSA-NEXT: l32r a3, .LCPI8_3 -; XTENSA-NEXT: l32r a8, .LCPI8_4 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: movi a8, 24 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a2, a2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: slli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: slli a9, a8, 16 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 24 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a2, a8 ; XTENSA-NEXT: .LBB8_2: # %cond.end -; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 -; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) ret i32 %tmp @@ -410,10 +405,7 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i32_zero_undef: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill -; XTENSA-NEXT: srli a8, a2, 1 +; XTENSA: srli a8, a2, 1 ; XTENSA-NEXT: or a8, a2, a8 ; XTENSA-NEXT: srli a9, a8, 2 ; XTENSA-NEXT: or a8, a8, a9 @@ -439,16 +431,14 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI11_2 -; XTENSA-NEXT: and a2, a8, a9 -; XTENSA-NEXT: l32r a3, .LCPI11_3 -; XTENSA-NEXT: l32r a8, .LCPI11_4 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: movi a8, 24 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a2, a2 -; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 -; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: slli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: slli a9, a8, 16 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 24 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a2, a8 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true) ret i32 %tmp @@ -500,10 +490,7 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { define i32 @test_ctpop_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_ctpop_i32: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill -; XTENSA-NEXT: srli a8, a2, 1 +; XTENSA: srli a8, a2, 1 ; XTENSA-NEXT: l32r a9, .LCPI14_0 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: sub a8, a2, a8 @@ -515,16 +502,14 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI14_2 -; XTENSA-NEXT: and a2, a8, a9 -; XTENSA-NEXT: l32r a3, .LCPI14_3 -; XTENSA-NEXT: l32r a8, .LCPI14_4 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: movi a8, 24 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a2, a2 -; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 -; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: slli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: slli a9, a8, 16 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: movi a9, 24 +; XTENSA-NEXT: ssr a9 +; XTENSA-NEXT: srl a2, a8 ; XTENSA-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll index 0be2885458163..2a96f6ce52690 100644 --- a/llvm/test/CodeGen/Xtensa/mul.ll +++ b/llvm/test/CodeGen/Xtensa/mul.ll @@ -634,3 +634,27 @@ define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { %r = or i8 %a, 240 ret i8 %r } + +define signext i32 @mul_imm_2(i32 %a) nounwind { +; XTENSA-LABEL: mul_imm_2: +; XTENSA: slli a2, a2, 1 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 2 + ret i32 %1 +} + +define signext i32 @mul_imm_1024(i32 %a) nounwind { +; XTENSA-LABEL: mul_imm_1024: +; XTENSA: slli a2, a2, 10 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 1024 + ret i32 %1 +} + +define signext i32 @mul_imm_16384(i32 %a) nounwind { +; XTENSA-LABEL: mul_imm_16384: +; XTENSA: slli a2, a2, 14 +; XTENSA-NEXT: ret + %1 = mul i32 %a, 16384 + ret i32 %1 +} diff --git a/llvm/test/CodeGen/Xtensa/shift.ll b/llvm/test/CodeGen/Xtensa/shift.ll index acca8551fa621..85973e26c2ef4 100644 --- a/llvm/test/CodeGen/Xtensa/shift.ll +++ b/llvm/test/CodeGen/Xtensa/shift.ll @@ -1,6 +1,97 @@ ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck %s +define i32 @lshl(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: lshl: +; CHECK: ssl a3 +; CHECK-NEXT: sll a2, a2 +; CHECK-NEXT: ret + %c = shl i32 %x, %y + ret i32 %c +} + +define i32 @lshl_imm_1(i32 %x) nounwind { +; CHECK-LABEL: lshl_imm_1: +; CHECK: slli a2, a2, 1 +; CHECK-NEXT: ret + %c = shl i32 %x, 1 + ret i32 %c +} + +define i32 @lshl_imm_10(i32 %x) nounwind { +; CHECK-LABEL: lshl_imm_10: +; CHECK: slli a2, a2, 10 +; CHECK-NEXT: ret + %c = shl i32 %x, 10 + ret i32 %c +} + +define i32 @lshl_imm_31(i32 %x) nounwind { +; CHECK-LABEL: lshl_imm_31: +; CHECK: slli a2, a2, 31 +; CHECK-NEXT: ret + %c = shl i32 %x, 31 + ret i32 %c +} + +define i32 @lshr(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: lshr: +; CHECK: ssr a3 +; CHECK-NEXT: srl a2, a2 +; CHECK-NEXT: ret + %c = lshr i32 %x, %y + ret i32 %c +} + +define i32 @lshr_imm_1(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: lshr_imm_1: +; CHECK: srli a2, a2, 1 +; CHECK-NEXT: ret + %c = lshr i32 %x, 1 + ret i32 %c +} + +define i32 @lshr_imm_15(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: lshr_imm_15: +; CHECK: srli a2, a2, 15 +; CHECK-NEXT: ret + %c = lshr i32 %x, 15 + ret i32 %c +} + +define i32 @ashr(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: ashr: +; CHECK: ssr a3 +; CHECK-NEXT: sra a2, a2 +; CHECK-NEXT: ret + %c = ashr i32 %x, %y + ret i32 %c +} + +define i32 @ashr_imm_1(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: ashr_imm_1: +; CHECK: srai a2, a2, 1 +; CHECK-NEXT: ret + %c = ashr i32 %x, 1 + ret i32 %c +} + +define i32 @ashr_imm_10(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: ashr_imm_10: +; CHECK: srai a2, a2, 10 +; CHECK-NEXT: ret + %c = ashr i32 %x, 10 + ret i32 %c +} + +define i32 @ashr_imm_31(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: ashr_imm_31: +; CHECK: srai a2, a2, 31 +; CHECK-NEXT: ret + %c = ashr i32 %x, 31 + ret i32 %c +} + define i64 @lshl_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: lshl_64: ; CHECK: ssl a4 @@ -9,16 +100,16 @@ define i64 @lshl_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: ssl a8 ; CHECK-NEXT: sll a10, a2 ; CHECK-NEXT: movi a9, 0 -; CHECK-NEXT: blt a8, a9, .LBB0_2 +; CHECK-NEXT: blt a8, a9, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: or a3, a10, a10 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: .LBB11_2: ; CHECK-NEXT: ssl a4 ; CHECK-NEXT: sll a2, a2 -; CHECK-NEXT: blt a8, a9, .LBB0_4 +; CHECK-NEXT: blt a8, a9, .LBB11_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: or a2, a9, a9 -; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: .LBB11_4: ; CHECK-NEXT: ret %c = shl i64 %x, %y ret i64 %c @@ -32,16 +123,16 @@ define i64 @lshr_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: ssr a8 ; CHECK-NEXT: srl a10, a3 ; CHECK-NEXT: movi a9, 0 -; CHECK-NEXT: blt a8, a9, .LBB1_2 +; CHECK-NEXT: blt a8, a9, .LBB12_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: or a2, a10, a10 -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: ssr a4 ; CHECK-NEXT: srl a3, a3 -; CHECK-NEXT: blt a8, a9, .LBB1_4 +; CHECK-NEXT: blt a8, a9, .LBB12_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: or a3, a9, a9 -; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: .LBB12_4: ; CHECK-NEXT: ret %c = lshr i64 %x, %y ret i64 %c @@ -55,16 +146,16 @@ define i64 @ashr_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: ssr a9 ; CHECK-NEXT: sra a8, a3 ; CHECK-NEXT: movi a10, 0 -; CHECK-NEXT: blt a9, a10, .LBB2_2 +; CHECK-NEXT: blt a9, a10, .LBB13_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: or a2, a8, a8 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: .LBB13_2: ; CHECK-NEXT: ssr a4 ; CHECK-NEXT: sra a8, a3 -; CHECK-NEXT: blt a9, a10, .LBB2_4 +; CHECK-NEXT: blt a9, a10, .LBB13_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: srai a8, a3, 31 -; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: .LBB13_4: ; CHECK-NEXT: or a3, a8, a8 ; CHECK-NEXT: ret %c = ashr i64 %x, %y From c4be288026d49c36c55c16c9282b03f226008171 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Tue, 6 Aug 2024 01:02:53 +0300 Subject: [PATCH 05/11] [Xtensa] Lower CTPOP operation. Implement lowering of the CTPOP operation. Also remove global variable creation code from lowerConstantPool functio. Add vector case handling to the decomposeMulByConstant function. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 105 ++++-- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 7 + llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 3 +- llvm/lib/Target/Xtensa/XtensaOperators.td | 5 + .../CodeGen/Xtensa/constantpool-aggregate.ll | 45 --- llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 346 ++++++++++-------- llvm/test/CodeGen/Xtensa/mul.ll | 11 + 7 files changed, 297 insertions(+), 225 deletions(-) delete mode 100644 llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index b87c081a62c96..5dbaa335f99fa 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -120,7 +120,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BSWAP, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTLZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); @@ -655,11 +655,13 @@ SDValue XtensaTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); const DataLayout &TD = DAG.getDataLayout(); EVT PtrVT = Table.getValueType(); + unsigned EntrySize = MJTI->getEntrySize(TD); assert((MJTI->getEntrySize(TD) == 4) && "Unsupported jump-table entry size"); - Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(), Index, - DAG.getConstant(2, DL, Index.getValueType())); + Index = DAG.getNode( + ISD::SHL, DL, Index.getValueType(), Index, + DAG.getConstant(Log2_32(EntrySize), DL, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, DL, Index.getValueType(), Index, Table); SDValue LD = @@ -696,29 +698,13 @@ SDValue XtensaTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); - auto C = const_cast(CP->getConstVal()); - auto T = CP->getType(); SDValue Result; - // Do not use constant pool for aggregate or vector constant types, - // in such cases create global variable, for example to store tabel - // when we lower CTTZ operation. - if (T->isAggregateType()) { - MachineFunction &MF = DAG.getMachineFunction(); - auto AFI = MF.getInfo(); - auto M = const_cast(MF.getFunction().getParent()); - auto GV = new GlobalVariable( - *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, - Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + - Twine(MF.getFunctionNumber()) + "_" + Twine(AFI->createLabelUId())); - Result = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); + if (!CP->isMachineConstantPoolEntry()) { + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), + CP->getOffset()); } else { - if (!CP->isMachineConstantPoolEntry()) { - Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlign(), CP->getOffset()); - } else { - report_fatal_error("This constantpool type is not supported yet"); - } + report_fatal_error("This constantpool type is not supported yet"); } return getAddrPCRel(Result, DAG); @@ -854,21 +840,74 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue XtensaTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op->getValueType(0); + SDValue Val = Op.getOperand(0); + SDLoc DL(Op); + + if (VT != MVT::i32) + return SDValue(); + + // CTPOP expansion: + // Val = (Val - (Val >> 1)) & 0x55555555 + // Val = ((Val >> 2) & 0x33333333) + (Val & 0x33333333) + // Val = ((Val >> 4) + Val) & 0x0f0f0f0f + // Val = (Val >> 8) + Val + // Val = (extract bits [16, 20] from Val) + Val + // Val = extract bits [0, 5] from Val + + SDValue Mask = DAG.getConstant(0x55555555, DL, VT); + SDValue Shift = + DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(1, DL, VT)); + SDValue ShiftAndMask = DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + Val = DAG.getNode(ISD::SUB, DL, VT, Val, ShiftAndMask); + + Mask = DAG.getConstant(0x33333333, DL, VT); + Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(2, DL, VT)); + SDValue ValAndMask = DAG.getNode(ISD::AND, DL, VT, Val, Mask); + ShiftAndMask = DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + Val = DAG.getNode(ISD::ADD, DL, VT, ValAndMask, ShiftAndMask); + + Mask = DAG.getConstant(0x0f0f0f0f, DL, VT); + Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(4, DL, VT)); + Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift); + Val = DAG.getNode(ISD::AND, DL, VT, Val, Mask); + + Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(8, DL, VT)); + Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift); + + Shift = DAG.getNode(XtensaISD::EXTUI, DL, VT, Val, + DAG.getConstant(16, DL, VT), DAG.getConstant(5, DL, VT)); + Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift); + + return DAG.getNode(XtensaISD::EXTUI, DL, VT, Val, DAG.getConstant(0, DL, VT), + DAG.getConstant(6, DL, VT)); +} + bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const { - if (!VT.isScalarInteger()) + APInt Imm; + unsigned EltSizeInBits; + + if (ISD::isConstantSplatVector(C.getNode(), Imm)) { + EltSizeInBits = VT.getScalarSizeInBits(); + } else if (VT.isScalarInteger()) { + EltSizeInBits = VT.getSizeInBits(); + if (auto *ConstNode = dyn_cast(C.getNode())) + Imm = ConstNode->getAPIntValue(); + else + return false; + } else { return false; + } // Omit if data size exceeds. - if (VT.getSizeInBits() > 32) + if (EltSizeInBits > 32) return false; - if (auto *ConstNode = dyn_cast(C.getNode())) { - const APInt &Imm = ConstNode->getAPIntValue(); - // Convert MULT to LSL. - if (Imm.isPowerOf2() && Imm.isIntN(5)) - return true; - } + // Convert MULT to LSL. + if (Imm.isPowerOf2() && Imm.isIntN(5)) + return true; return false; } @@ -886,6 +925,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerBlockAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::CTPOP: + return LowerCTPOP(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::SELECT_CC: @@ -913,6 +954,8 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const { return "XtensaISD::BR_JT"; case XtensaISD::CALL: return "XtensaISD::CALL"; + case XtensaISD::EXTUI: + return "XtensaISD::EXTUI"; case XtensaISD::PCREL_WRAPPER: return "XtensaISD::PCREL_WRAPPER"; case XtensaISD::RET: diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 861c3c58847a1..8e7346b40dfe5 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -30,6 +30,11 @@ enum { // There is an optional glue operand at the end. CALL, + // Extract unsigned immediate. Operand 0 is value, operand 1 + // is bit position of the field [0..31], operand 2 is bit size + // of the field [1..16] + EXTUI, + // Wraps a TargetGlobalAddress that should be loaded using PC-relative // accesses. Operand 0 is the address. PCREL_WRAPPER, @@ -116,6 +121,8 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index 18a31fef18446..a4c6d62f85769 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -138,7 +138,8 @@ let Defs = [SAR] in { } def EXTUI : RRR_Inst<0x00, 0x04, 0x00, (outs AR:$r), (ins AR:$t, uimm5:$imm1, imm1_16:$imm2), - "extui\t$r, $t, $imm1, $imm2", []> { + "extui\t$r, $t, $imm1, $imm2", + [(set AR:$r, (Xtensa_extui AR:$t, uimm5:$imm1, imm1_16:$imm2))]> { bits<5> imm1; bits<4> imm2; diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td index c825359f3c5dd..3dd73b44f336a 100644 --- a/llvm/lib/Target/Xtensa/XtensaOperators.td +++ b/llvm/lib/Target/Xtensa/XtensaOperators.td @@ -28,6 +28,9 @@ def SDT_XtensaSelectCC : SDTypeProfile<1, 5, def SDT_XtensaSRC : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDT_XtensaEXTUI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + //===----------------------------------------------------------------------===// // Node definitions //===----------------------------------------------------------------------===// @@ -54,3 +57,5 @@ def Xtensa_select_cc: SDNode<"XtensaISD::SELECT_CC", SDT_XtensaSelectCC, def Xtensa_srcl: SDNode<"XtensaISD::SRCL", SDT_XtensaSRC>; def Xtensa_srcr: SDNode<"XtensaISD::SRCR", SDT_XtensaSRC>; + +def Xtensa_extui: SDNode<"XtensaISD::EXTUI", SDT_XtensaEXTUI>; diff --git a/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll b/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll deleted file mode 100644 index 3ace3a6d604b3..0000000000000 --- a/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ -; RUN: | FileCheck %s - -; Check that we place const array (CTTZ lookup table) in global variable, -; instead of constant pool and place label to this table in constant pool. - -; CHECK: .literal_position -; CHECK-NEXT: .literal .LCPI0_0, 125613361 -; CHECK-NEXT: .literal .LCPI0_1, __mulsi3 -; CHECK-NEXT: .literal .LCPI0_2, .LCP0_0 -; CHECK-NEXT: .global test_cttz_i32 - -define i32 @test_cttz_i32(i32 %a) nounwind { -; XTENSA-LABEL: test_cttz_i32: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill -; XTENSA-NEXT: or a8, a2, a2 -; XTENSA-NEXT: movi a2, 32 -; XTENSA-NEXT: beqz a8, .LBB0_2 -; XTENSA-NEXT: j .LBB2_1 -; XTENSA-NEXT: .LBB2_1: # %cond.false -; XTENSA-NEXT: neg a9, a8 -; XTENSA-NEXT: and a2, a8, a9 -; XTENSA-NEXT: l32r a3, .LCPI0_0 -; XTENSA-NEXT: l32r a8, .LCPI0_1 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: movi a8, 27 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a8, a2 -; XTENSA-NEXT: l32r a9, .LCPI0_2 -; XTENSA-NEXT: add a8, a9, a8 -; XTENSA-NEXT: l8ui a2, a8, 0 -; XTENSA-NEXT: .LBB2_2: # %cond.end -; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: ret - %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) - ret i32 %tmp -} - -; CHECK: .LCP0_0: -; CHECK-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t" -; CHECK-NEXT: .size .LCP0_0, 32 diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index 81e4a04c6d23e..c1e590484e717 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -16,29 +16,35 @@ declare i64 @llvm.ctpop.i64(i64) define i8 @test_cttz_i8(i8 %a) nounwind { ; XTENSA-LABEL: test_cttz_i8: -; XTENSA: movi a8, 255 -; XTENSA-NEXT: and a9, a2, a8 +; XTENSA: movi a9, 255 +; XTENSA-NEXT: and a10, a2, a9 ; XTENSA-NEXT: movi a8, 8 -; XTENSA-NEXT: beqz a9, .LBB0_2 +; XTENSA-NEXT: beqz a10, .LBB0_2 ; XTENSA-NEXT: j .LBB0_1 ; XTENSA-NEXT: .LBB0_1: # %cond.false ; XTENSA-NEXT: movi a8, -1 ; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: addi a10, a2, -1 +; XTENSA-NEXT: and a8, a8, a10 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: l32r a10, .LCPI0_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: l32r a9, .LCPI0_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: l32r a9, .LCPI0_2 ; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a8, a8, 0, 6 ; XTENSA-NEXT: .LBB0_2: # %cond.end ; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret @@ -48,15 +54,16 @@ define i8 @test_cttz_i8(i8 %a) nounwind { define i16 @test_cttz_i16(i16 %a) nounwind { ; XTENSA-LABEL: test_cttz_i16: -; XTENSA: l32r a8, .LCPI1_0 -; XTENSA-NEXT: and a9, a2, a8 +; XTENSA: l32r a9, .LCPI1_0 +; XTENSA-NEXT: and a10, a2, a9 ; XTENSA-NEXT: movi a8, 16 -; XTENSA-NEXT: beqz a9, .LBB1_2 +; XTENSA-NEXT: beqz a10, .LBB1_2 ; XTENSA-NEXT: j .LBB1_1 ; XTENSA-NEXT: .LBB1_1: # %cond.false ; XTENSA-NEXT: movi a8, -1 ; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: addi a10, a2, -1 +; XTENSA-NEXT: and a8, a8, a10 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 ; XTENSA-NEXT: l32r a10, .LCPI1_1 @@ -69,12 +76,13 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: l32r a10, .LCPI1_3 -; XTENSA-NEXT: and a8, a8, a10 -; XTENSA-NEXT: srli a8, a8, 8 -; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: l32r a9, .LCPI1_3 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a8, a8, 0, 6 ; XTENSA-NEXT: .LBB1_2: # %cond.end ; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret @@ -84,29 +92,34 @@ define i16 @test_cttz_i16(i16 %a) nounwind { define i32 @test_cttz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_cttz_i32: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill -; XTENSA-NEXT: or a8, a2, a2 -; XTENSA-NEXT: movi a2, 32 -; XTENSA-NEXT: beqz a8, .LBB2_2 +; XTENSA: movi a8, 32 +; XTENSA-NEXT: beqz a2, .LBB2_2 ; XTENSA-NEXT: j .LBB2_1 ; XTENSA-NEXT: .LBB2_1: # %cond.false -; XTENSA-NEXT: neg a9, a8 -; XTENSA-NEXT: and a2, a8, a9 -; XTENSA-NEXT: l32r a3, .LCPI2_0 -; XTENSA-NEXT: l32r a8, .LCPI2_1 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: movi a8, 27 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a8, a2 +; XTENSA-NEXT: movi a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI2_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI2_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI2_2 -; XTENSA-NEXT: add a8, a9, a8 -; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a8, a8, 0, 6 ; XTENSA-NEXT: .LBB2_2: # %cond.end -; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 -; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) ret i32 %tmp @@ -118,19 +131,26 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind { ; XTENSA-NEXT: xor a8, a2, a8 ; XTENSA-NEXT: addi a9, a2, -1 ; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: movi a9, 255 +; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: l32r a10, .LCPI3_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: l32r a9, .LCPI3_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI3_2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) ret i8 %tmp @@ -142,23 +162,26 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { ; XTENSA-NEXT: xor a8, a2, a8 ; XTENSA-NEXT: addi a9, a2, -1 ; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI4_0 +; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI4_0 +; XTENSA-NEXT: l32r a10, .LCPI4_1 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI4_1 +; XTENSA-NEXT: l32r a9, .LCPI4_2 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: l32r a10, .LCPI4_2 -; XTENSA-NEXT: and a8, a8, a10 -; XTENSA-NEXT: srli a8, a8, 8 -; XTENSA-NEXT: add a2, a9, a8 +; XTENSA-NEXT: l32r a9, .LCPI4_3 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) ret i16 %tmp @@ -166,23 +189,28 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-LABEL: test_cttz_i32_zero_undef: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill -; XTENSA-NEXT: neg a8, a2 -; XTENSA-NEXT: and a2, a2, a8 -; XTENSA-NEXT: l32r a3, .LCPI5_0 -; XTENSA-NEXT: l32r a8, .LCPI5_1 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: movi a8, 27 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a8, a2 +; XTENSA: movi a8, -1 +; XTENSA-NEXT: xor a8, a2, a8 +; XTENSA-NEXT: addi a9, a2, -1 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI5_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI5_1 +; XTENSA-NEXT: and a10, a8, a9 +; XTENSA-NEXT: srli a8, a8, 2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: add a8, a10, a8 +; XTENSA-NEXT: srli a9, a8, 4 +; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI5_2 -; XTENSA-NEXT: add a8, a9, a8 -; XTENSA-NEXT: l8ui a2, a8, 0 -; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 -; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true) ret i32 %tmp @@ -190,39 +218,45 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { define i8 @test_ctlz_i8(i8 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i8: -; XTENSA: movi a8, 255 -; XTENSA-NEXT: and a9, a2, a8 +; XTENSA: movi a9, 255 +; XTENSA-NEXT: and a10, a2, a9 ; XTENSA-NEXT: movi a8, 8 -; XTENSA-NEXT: beqz a9, .LBB6_2 +; XTENSA-NEXT: beqz a10, .LBB6_2 ; XTENSA-NEXT: j .LBB6_1 ; XTENSA-NEXT: .LBB6_1: # %cond.false ; XTENSA-NEXT: movi a8, 254 ; XTENSA-NEXT: and a8, a2, a8 ; XTENSA-NEXT: srli a8, a8, 1 ; XTENSA-NEXT: or a8, a2, a8 -; XTENSA-NEXT: movi a9, 252 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 2 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, 240 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 4 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, -1 -; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: movi a10, 252 +; XTENSA-NEXT: and a10, a8, a10 +; XTENSA-NEXT: srli a10, a10, 2 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: movi a10, 240 +; XTENSA-NEXT: and a10, a8, a10 +; XTENSA-NEXT: srli a10, a10, 4 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: movi a10, -1 +; XTENSA-NEXT: xor a8, a8, a10 +; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: l32r a10, .LCPI6_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: l32r a9, .LCPI6_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 +; XTENSA-NEXT: l32r a9, .LCPI6_2 ; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a8, a8, 0, 6 ; XTENSA-NEXT: .LBB6_2: # %cond.end ; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret @@ -232,30 +266,31 @@ define i8 @test_ctlz_i8(i8 %a) nounwind { define i16 @test_ctlz_i16(i16 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i16: -; XTENSA: l32r a8, .LCPI7_0 -; XTENSA-NEXT: and a9, a2, a8 +; XTENSA: l32r a9, .LCPI7_0 +; XTENSA-NEXT: and a10, a2, a9 ; XTENSA-NEXT: movi a8, 16 -; XTENSA-NEXT: beqz a9, .LBB7_2 +; XTENSA-NEXT: beqz a10, .LBB7_2 ; XTENSA-NEXT: j .LBB7_1 ; XTENSA-NEXT: .LBB7_1: # %cond.false ; XTENSA-NEXT: l32r a8, .LCPI7_1 ; XTENSA-NEXT: and a8, a2, a8 ; XTENSA-NEXT: srli a8, a8, 1 ; XTENSA-NEXT: or a8, a2, a8 -; XTENSA-NEXT: l32r a9, .LCPI7_2 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 2 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI7_3 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 4 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI7_4 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 8 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, -1 -; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: l32r a10, .LCPI7_2 +; XTENSA-NEXT: and a10, a8, a10 +; XTENSA-NEXT: srli a10, a10, 2 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: l32r a10, .LCPI7_3 +; XTENSA-NEXT: and a10, a8, a10 +; XTENSA-NEXT: srli a10, a10, 4 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: l32r a10, .LCPI7_4 +; XTENSA-NEXT: and a10, a8, a10 +; XTENSA-NEXT: srli a10, a10, 8 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: movi a10, -1 +; XTENSA-NEXT: xor a8, a8, a10 +; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 ; XTENSA-NEXT: l32r a10, .LCPI7_5 ; XTENSA-NEXT: and a9, a9, a10 @@ -267,12 +302,13 @@ define i16 @test_ctlz_i16(i16 %a) nounwind { ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: l32r a10, .LCPI7_7 -; XTENSA-NEXT: and a8, a8, a10 -; XTENSA-NEXT: srli a8, a8, 8 -; XTENSA-NEXT: add a8, a9, a8 +; XTENSA-NEXT: l32r a9, .LCPI7_7 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a8, a8, 0, 6 ; XTENSA-NEXT: .LBB7_2: # %cond.end ; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret @@ -314,13 +350,11 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI8_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: slli a9, a8, 8 +; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: slli a9, a8, 16 +; XTENSA-NEXT: extui a9, a8, 16, 5 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 24 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a2, a8 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: .LBB8_2: # %cond.end ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) @@ -343,19 +377,26 @@ define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind { ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: movi a9, -1 ; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: movi a9, 255 +; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: movi a10, 85 +; XTENSA-NEXT: l32r a10, .LCPI9_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: movi a9, 51 +; XTENSA-NEXT: l32r a9, .LCPI9_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI9_2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true) ret i8 %tmp @@ -381,23 +422,26 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: movi a9, -1 ; XTENSA-NEXT: xor a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI10_4 +; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI10_4 +; XTENSA-NEXT: l32r a10, .LCPI10_5 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI10_5 +; XTENSA-NEXT: l32r a9, .LCPI10_6 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: l32r a10, .LCPI10_6 -; XTENSA-NEXT: and a8, a8, a10 -; XTENSA-NEXT: srli a8, a8, 8 -; XTENSA-NEXT: add a2, a9, a8 +; XTENSA-NEXT: l32r a9, .LCPI10_7 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true) ret i16 %tmp @@ -432,13 +476,11 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI11_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: slli a9, a8, 8 +; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: slli a9, a8, 16 +; XTENSA-NEXT: extui a9, a8, 16, 5 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 24 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a2, a8 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true) ret i32 %tmp @@ -446,19 +488,26 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { define i8 @test_ctpop_i8(i8 %a) nounwind { ; XTENSA-LABEL: test_ctpop_i8: -; XTENSA: srli a8, a2, 1 -; XTENSA-NEXT: movi a9, 85 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: sub a8, a2, a8 -; XTENSA-NEXT: movi a9, 51 +; XTENSA: movi a8, 255 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI12_0 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI12_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a2, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI12_2 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) ret i8 %1 @@ -466,23 +515,26 @@ define i8 @test_ctpop_i8(i8 %a) nounwind { define i16 @test_ctpop_i16(i16 %a) nounwind { ; XTENSA-LABEL: test_ctpop_i16: -; XTENSA: srli a8, a2, 1 -; XTENSA-NEXT: l32r a9, .LCPI13_0 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: sub a8, a2, a8 -; XTENSA-NEXT: l32r a9, .LCPI13_1 +; XTENSA: l32r a8, .LCPI13_0 +; XTENSA-NEXT: and a8, a2, a8 +; XTENSA-NEXT: srli a9, a8, 1 +; XTENSA-NEXT: l32r a10, .LCPI13_1 +; XTENSA-NEXT: and a9, a9, a10 +; XTENSA-NEXT: sub a8, a8, a9 +; XTENSA-NEXT: l32r a9, .LCPI13_2 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 15 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: l32r a10, .LCPI13_2 -; XTENSA-NEXT: and a8, a8, a10 -; XTENSA-NEXT: srli a8, a8, 8 -; XTENSA-NEXT: add a2, a9, a8 +; XTENSA-NEXT: l32r a9, .LCPI13_3 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: add a8, a8, a9 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 @@ -503,13 +555,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI14_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: slli a9, a8, 8 +; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: slli a9, a8, 16 +; XTENSA-NEXT: extui a9, a8, 16, 5 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: movi a9, 24 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a2, a8 +; XTENSA-NEXT: extui a2, a8, 0, 6 ; XTENSA-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll index 2a96f6ce52690..08b4b1f57166a 100644 --- a/llvm/test/CodeGen/Xtensa/mul.ll +++ b/llvm/test/CodeGen/Xtensa/mul.ll @@ -658,3 +658,14 @@ define signext i32 @mul_imm_16384(i32 %a) nounwind { %1 = mul i32 %a, 16384 ret i32 %1 } + +define <4 x i32> @mul_vec_splat_constant(<4 x i32> %a) { +; XTENSA-LABEL: mul_vec_splat_constant: +; XTENSA: slli a2, a2, 2 +; XTENSA-NEXT: slli a3, a3, 2 +; XTENSA-NEXT: slli a4, a4, 2 +; XTENSA-NEXT: slli a5, a5, 2 +; XTENSA-NEXT: ret + %mul = mul <4 x i32> %a, + ret <4 x i32> %mul +} From 81356443adb0c8974342712265940fc759166262 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Sun, 11 Aug 2024 13:01:30 +0300 Subject: [PATCH 06/11] [Xtensa] Move SHL/SRL/SRA/SRC lowering from ISelLowering to ISelDagToDag. Remove SHL/SRL/SRA/SRC pseudo operations. Remove redundant cttz/ctlz/ctpop tests. Remove Xtensa MachineFunctionInfo implementation. --- llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 63 +++ llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 60 --- llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 24 - .../Target/Xtensa/XtensaMachineFunctionInfo.h | 53 --- .../lib/Target/Xtensa/XtensaTargetMachine.cpp | 7 - llvm/lib/Target/Xtensa/XtensaTargetMachine.h | 4 - llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 429 +----------------- 7 files changed, 84 insertions(+), 556 deletions(-) delete mode 100644 llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 145f285036358..869db1c7fcd5a 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -125,6 +125,7 @@ FunctionPass *llvm::createXtensaISelDag(XtensaTargetMachine &TM, void XtensaDAGToDAGISel::Select(SDNode *Node) { SDLoc DL(Node); + EVT VT = Node->getValueType(0); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -132,5 +133,67 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { return; } + switch (Node->getOpcode()) { + case ISD::SHL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + if (!isa(N1)) { + SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N1); + SDNode *SLL = + CurDAG->getMachineNode(Xtensa::SLL, DL, VT, N0, SDValue(SSL, 0)); + ReplaceNode(Node, SLL); + return; + } + break; + } + case ISD::SRL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + auto *C = dyn_cast(N1); + // If C is constant in range [0..15] then we can generate SRLI + // instruction using pattern matching, otherwise generate SRL + if (!C || !isUInt<4>(C->getZExtValue())) { + SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); + SDNode *SRL = + CurDAG->getMachineNode(Xtensa::SRL, DL, VT, N0, SDValue(SSR, 0)); + ReplaceNode(Node, SRL); + return; + } + break; + } + case ISD::SRA: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + if (!isa(N1)) { + SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); + SDNode *SRA = + CurDAG->getMachineNode(Xtensa::SRA, DL, VT, N0, SDValue(SSR, 0)); + ReplaceNode(Node, SRA); + return; + } + break; + } + case XtensaISD::SRCL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N2); + SDNode *SRC = + CurDAG->getMachineNode(Xtensa::SRC, DL, VT, N0, N1, SDValue(SSL, 0)); + ReplaceNode(Node, SRC); + return; + } + case XtensaISD::SRCR: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N2); + SDNode *SRC = + CurDAG->getMachineNode(Xtensa::SRC, DL, VT, N0, N1, SDValue(SSR, 0)); + ReplaceNode(Node, SRC); + return; + } + } + SelectCode(Node); } diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 5dbaa335f99fa..11827021db32f 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -13,7 +13,6 @@ #include "XtensaISelLowering.h" #include "XtensaConstantPoolValue.h" -#include "XtensaMachineFunctionInfo.h" #include "XtensaSubtarget.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -1040,70 +1039,11 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI, MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { case Xtensa::SELECT: return emitSelectCC(MI, MBB); - case Xtensa::SHL_P: { - MachineOperand &R = MI.getOperand(0); - MachineOperand &S = MI.getOperand(1); - MachineOperand &SA = MI.getOperand(2); - - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSL)).addReg(SA.getReg()); - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SLL), R.getReg()).addReg(S.getReg()); - MI.eraseFromParent(); - return MBB; - } - case Xtensa::SRA_P: { - MachineOperand &R = MI.getOperand(0); - MachineOperand &T = MI.getOperand(1); - MachineOperand &SA = MI.getOperand(2); - - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg()); - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRA), R.getReg()).addReg(T.getReg()); - MI.eraseFromParent(); - return MBB; - } - case Xtensa::SRL_P: { - MachineOperand &R = MI.getOperand(0); - MachineOperand &T = MI.getOperand(1); - MachineOperand &SA = MI.getOperand(2); - - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg()); - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRL), R.getReg()).addReg(T.getReg()); - MI.eraseFromParent(); - return MBB; - } - case Xtensa::SRCL_P: { - MachineOperand &R = MI.getOperand(0); - MachineOperand &HI = MI.getOperand(1); - MachineOperand &LO = MI.getOperand(2); - MachineOperand &SA = MI.getOperand(3); - - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSL)).addReg(SA.getReg()); - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRC), R.getReg()) - .addReg(HI.getReg()) - .addReg(LO.getReg()); - ; - MI.eraseFromParent(); - return MBB; - } - case Xtensa::SRCR_P: { - MachineOperand &R = MI.getOperand(0); - MachineOperand &HI = MI.getOperand(1); - MachineOperand &LO = MI.getOperand(2); - MachineOperand &SA = MI.getOperand(3); - - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg()); - BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRC), R.getReg()) - .addReg(HI.getReg()) - .addReg(LO.getReg()); - ; - MI.eraseFromParent(); - return MBB; - } default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index a4c6d62f85769..0d01864b54bc3 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -190,30 +190,6 @@ def SSAI : RRR_Inst<0x00, 0x00, 0x04, (outs), (ins uimm5:$imm), let t{0} = imm{4}; } -// Shift Pseudo instructions: -// SSL/SSR + Shift combination -let usesCustomInserter = 1 in { - def SHL_P : Pseudo<(outs AR:$r), (ins AR:$s, AR:$sa), - "# SHL_P $r, $s, $sa", - [(set i32:$r, (shl i32:$s, i32:$sa))]>; - - def SRA_P : Pseudo<(outs AR:$r), (ins AR:$t, AR:$sa), - "# SRA_P $r, $t, $sa", - [(set i32:$r, (sra i32:$t, i32:$sa))]>; - - def SRL_P : Pseudo<(outs AR:$r), (ins AR:$t, AR:$sa), - "# SRL_P $r, $t, $sa", - [(set i32:$r, (srl i32:$t, i32:$sa))]>; - - def SRCL_P : Pseudo<(outs AR:$r), (ins AR:$hi, AR:$lo, AR:$sa), - "# SRCL_P $r, $hi, $lo, $sa", - [(set i32:$r, (Xtensa_srcl i32:$hi, i32:$lo, i32:$sa))]>; - - def SRCR_P : Pseudo<(outs AR:$r), (ins AR:$hi, AR:$lo, AR:$sa), - "# SRCR_P $r, $hi, $lo, $sa", - [(set i32:$r, (Xtensa_srcr i32:$hi, i32:$lo, i32:$sa))]>; -} - //===----------------------------------------------------------------------===// // Load and store instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h deleted file mode 100644 index 86ee81128c34c..0000000000000 --- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h +++ /dev/null @@ -1,53 +0,0 @@ -//==- XtensaMachineFunctionInfo.h - Xtensa machine function info --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares Xtensa-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H -#define LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -class XtensaFunctionInfo : public MachineFunctionInfo { - unsigned VarArgsFirstGPR; - int VarArgsStackOffset; - unsigned VarArgsFrameIndex; - bool SaveFrameRegister = false; - unsigned LabelUId = 0; - -public: - explicit XtensaFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) - : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {} - - unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } - void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } - - int getVarArgsStackOffset() const { return VarArgsStackOffset; } - void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } - - // Get and set the frame index of the first stack vararg. - unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } - void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } - - bool isSaveFrameRegister() const { return SaveFrameRegister; } - void setSaveFrameRegister() { SaveFrameRegister = true; } - - unsigned createLabelUId() { return LabelUId++; } -}; - -} // namespace llvm - -#endif /* LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H */ diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp index eba169a2fe7a9..49c7faf84df1d 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp @@ -14,7 +14,6 @@ #include "XtensaTargetMachine.h" #include "TargetInfo/XtensaTargetInfo.h" -#include "XtensaMachineFunctionInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -84,12 +83,6 @@ XtensaTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -MachineFunctionInfo *XtensaTargetMachine::createMachineFunctionInfo( - BumpPtrAllocator &Allocator, const Function &F, - const TargetSubtargetInfo *STI) const { - return XtensaFunctionInfo::create(Allocator, F, STI); -} - namespace { /// Xtensa Code Generator Pass Configuration Options. class XtensaPassConfig : public TargetPassConfig { diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h index 6975076b5d699..f371f22ed3d0e 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h @@ -45,10 +45,6 @@ class XtensaTargetMachine : public LLVMTargetMachine { return TLOF.get(); } - MachineFunctionInfo * - createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, - const TargetSubtargetInfo *STI) const override; - protected: mutable StringMap> SubtargetMap; }; diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index c1e590484e717..83dbd1265a0fb 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -1,31 +1,19 @@ ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=XTENSA %s -declare i8 @llvm.cttz.i8(i8, i1) -declare i16 @llvm.cttz.i16(i16, i1) declare i32 @llvm.cttz.i32(i32, i1) -declare i64 @llvm.cttz.i64(i64, i1) -declare i8 @llvm.ctlz.i8(i8, i1) -declare i16 @llvm.ctlz.i16(i16, i1) declare i32 @llvm.ctlz.i32(i32, i1) -declare i64 @llvm.ctlz.i64(i64, i1) -declare i8 @llvm.ctpop.i8(i8) -declare i16 @llvm.ctpop.i16(i16) declare i32 @llvm.ctpop.i32(i32) -declare i64 @llvm.ctpop.i64(i64) -define i8 @test_cttz_i8(i8 %a) nounwind { -; XTENSA-LABEL: test_cttz_i8: -; XTENSA: movi a9, 255 -; XTENSA-NEXT: and a10, a2, a9 -; XTENSA-NEXT: movi a8, 8 -; XTENSA-NEXT: beqz a10, .LBB0_2 +define i32 @test_cttz_i32(i32 %a) nounwind { +; XTENSA-LABEL: test_cttz_i32: +; XTENSA: movi a8, 32 +; XTENSA-NEXT: beqz a2, .LBB0_2 ; XTENSA-NEXT: j .LBB0_1 ; XTENSA-NEXT: .LBB0_1: # %cond.false ; XTENSA-NEXT: movi a8, -1 ; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a10, a2, -1 -; XTENSA-NEXT: and a8, a8, a10 +; XTENSA-NEXT: addi a9, a2, -1 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 ; XTENSA-NEXT: l32r a10, .LCPI0_0 @@ -47,146 +35,11 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; XTENSA-NEXT: extui a8, a8, 0, 6 ; XTENSA-NEXT: .LBB0_2: # %cond.end ; XTENSA-NEXT: or a2, a8, a8 -; XTENSA-NEXT: ret - %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) - ret i8 %tmp -} - -define i16 @test_cttz_i16(i16 %a) nounwind { -; XTENSA-LABEL: test_cttz_i16: -; XTENSA: l32r a9, .LCPI1_0 -; XTENSA-NEXT: and a10, a2, a9 -; XTENSA-NEXT: movi a8, 16 -; XTENSA-NEXT: beqz a10, .LBB1_2 -; XTENSA-NEXT: j .LBB1_1 -; XTENSA-NEXT: .LBB1_1: # %cond.false -; XTENSA-NEXT: movi a8, -1 -; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a10, a2, -1 -; XTENSA-NEXT: and a8, a8, a10 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI1_1 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI1_2 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI1_3 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a8, a8, 0, 6 -; XTENSA-NEXT: .LBB1_2: # %cond.end -; XTENSA-NEXT: or a2, a8, a8 -; XTENSA-NEXT: ret - %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) - ret i16 %tmp -} - -define i32 @test_cttz_i32(i32 %a) nounwind { -; XTENSA-LABEL: test_cttz_i32: -; XTENSA: movi a8, 32 -; XTENSA-NEXT: beqz a2, .LBB2_2 -; XTENSA-NEXT: j .LBB2_1 -; XTENSA-NEXT: .LBB2_1: # %cond.false -; XTENSA-NEXT: movi a8, -1 -; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a9, a2, -1 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI2_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI2_1 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI2_2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a8, a8, 0, 6 -; XTENSA-NEXT: .LBB2_2: # %cond.end -; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) ret i32 %tmp } -define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind { -; XTENSA-LABEL: test_cttz_i8_zero_undef: -; XTENSA: movi a8, -1 -; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a9, a2, -1 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: movi a9, 255 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI3_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI3_1 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI3_2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: ret - %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) - ret i8 %tmp -} - -define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { -; XTENSA-LABEL: test_cttz_i16_zero_undef: -; XTENSA: movi a8, -1 -; XTENSA-NEXT: xor a8, a2, a8 -; XTENSA-NEXT: addi a9, a2, -1 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI4_0 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI4_1 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI4_2 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI4_3 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: ret - %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) - ret i16 %tmp -} - define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-LABEL: test_cttz_i32_zero_undef: ; XTENSA: movi a8, -1 @@ -194,17 +47,17 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: addi a9, a2, -1 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI5_0 +; XTENSA-NEXT: l32r a10, .LCPI1_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI5_1 +; XTENSA-NEXT: l32r a9, .LCPI1_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI5_2 +; XTENSA-NEXT: l32r a9, .LCPI1_2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 @@ -216,113 +69,13 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ret i32 %tmp } -define i8 @test_ctlz_i8(i8 %a) nounwind { -; XTENSA-LABEL: test_ctlz_i8: -; XTENSA: movi a9, 255 -; XTENSA-NEXT: and a10, a2, a9 -; XTENSA-NEXT: movi a8, 8 -; XTENSA-NEXT: beqz a10, .LBB6_2 -; XTENSA-NEXT: j .LBB6_1 -; XTENSA-NEXT: .LBB6_1: # %cond.false -; XTENSA-NEXT: movi a8, 254 -; XTENSA-NEXT: and a8, a2, a8 -; XTENSA-NEXT: srli a8, a8, 1 -; XTENSA-NEXT: or a8, a2, a8 -; XTENSA-NEXT: movi a10, 252 -; XTENSA-NEXT: and a10, a8, a10 -; XTENSA-NEXT: srli a10, a10, 2 -; XTENSA-NEXT: or a8, a8, a10 -; XTENSA-NEXT: movi a10, 240 -; XTENSA-NEXT: and a10, a8, a10 -; XTENSA-NEXT: srli a10, a10, 4 -; XTENSA-NEXT: or a8, a8, a10 -; XTENSA-NEXT: movi a10, -1 -; XTENSA-NEXT: xor a8, a8, a10 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI6_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI6_1 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI6_2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a8, a8, 0, 6 -; XTENSA-NEXT: .LBB6_2: # %cond.end -; XTENSA-NEXT: or a2, a8, a8 -; XTENSA-NEXT: ret - %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false) - ret i8 %tmp -} - -define i16 @test_ctlz_i16(i16 %a) nounwind { -; XTENSA-LABEL: test_ctlz_i16: -; XTENSA: l32r a9, .LCPI7_0 -; XTENSA-NEXT: and a10, a2, a9 -; XTENSA-NEXT: movi a8, 16 -; XTENSA-NEXT: beqz a10, .LBB7_2 -; XTENSA-NEXT: j .LBB7_1 -; XTENSA-NEXT: .LBB7_1: # %cond.false -; XTENSA-NEXT: l32r a8, .LCPI7_1 -; XTENSA-NEXT: and a8, a2, a8 -; XTENSA-NEXT: srli a8, a8, 1 -; XTENSA-NEXT: or a8, a2, a8 -; XTENSA-NEXT: l32r a10, .LCPI7_2 -; XTENSA-NEXT: and a10, a8, a10 -; XTENSA-NEXT: srli a10, a10, 2 -; XTENSA-NEXT: or a8, a8, a10 -; XTENSA-NEXT: l32r a10, .LCPI7_3 -; XTENSA-NEXT: and a10, a8, a10 -; XTENSA-NEXT: srli a10, a10, 4 -; XTENSA-NEXT: or a8, a8, a10 -; XTENSA-NEXT: l32r a10, .LCPI7_4 -; XTENSA-NEXT: and a10, a8, a10 -; XTENSA-NEXT: srli a10, a10, 8 -; XTENSA-NEXT: or a8, a8, a10 -; XTENSA-NEXT: movi a10, -1 -; XTENSA-NEXT: xor a8, a8, a10 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI7_5 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI7_6 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI7_7 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a8, a8, 0, 6 -; XTENSA-NEXT: .LBB7_2: # %cond.end -; XTENSA-NEXT: or a2, a8, a8 -; XTENSA-NEXT: ret - %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false) - ret i16 %tmp -} - define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i32: ; XTENSA: or a8, a2, a2 ; XTENSA-NEXT: movi a2, 32 -; XTENSA-NEXT: beqz a8, .LBB8_2 -; XTENSA-NEXT: j .LBB8_1 -; XTENSA-NEXT: .LBB8_1: # %cond.false +; XTENSA-NEXT: beqz a8, .LBB2_2 +; XTENSA-NEXT: j .LBB2_1 +; XTENSA-NEXT: .LBB2_1: # %cond.false ; XTENSA-NEXT: srli a9, a8, 1 ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 2 @@ -338,115 +91,29 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: movi a9, -1 ; XTENSA-NEXT: xor a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI8_0 +; XTENSA-NEXT: l32r a10, .LCPI2_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI8_1 +; XTENSA-NEXT: l32r a9, .LCPI2_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI8_2 +; XTENSA-NEXT: l32r a9, .LCPI2_2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: extui a9, a8, 16, 5 ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: .LBB8_2: # %cond.end +; XTENSA-NEXT: .LBB2_2: # %cond.end ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) ret i32 %tmp } -define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind { -; XTENSA-LABEL: test_ctlz_i8_zero_undef: -; XTENSA: movi a8, 254 -; XTENSA-NEXT: and a8, a2, a8 -; XTENSA-NEXT: srli a8, a8, 1 -; XTENSA-NEXT: or a8, a2, a8 -; XTENSA-NEXT: movi a9, 252 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 2 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, 240 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 4 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, -1 -; XTENSA-NEXT: xor a8, a8, a9 -; XTENSA-NEXT: movi a9, 255 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI9_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI9_1 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI9_2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: ret - %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true) - ret i8 %tmp -} - -define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { -; XTENSA-LABEL: test_ctlz_i16_zero_undef: -; XTENSA: l32r a8, .LCPI10_0 -; XTENSA-NEXT: and a8, a2, a8 -; XTENSA-NEXT: srli a8, a8, 1 -; XTENSA-NEXT: or a8, a2, a8 -; XTENSA-NEXT: l32r a9, .LCPI10_1 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 2 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI10_2 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 4 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI10_3 -; XTENSA-NEXT: and a9, a8, a9 -; XTENSA-NEXT: srli a9, a9, 8 -; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, -1 -; XTENSA-NEXT: xor a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI10_4 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI10_5 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI10_6 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI10_7 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: ret - %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true) - ret i16 %tmp -} - define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-LABEL: test_ctlz_i32_zero_undef: ; XTENSA: srli a8, a2, 1 @@ -464,17 +131,17 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: movi a9, -1 ; XTENSA-NEXT: xor a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI11_0 +; XTENSA-NEXT: l32r a10, .LCPI3_0 ; XTENSA-NEXT: and a9, a9, a10 ; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI11_1 +; XTENSA-NEXT: l32r a9, .LCPI3_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI11_2 +; XTENSA-NEXT: l32r a9, .LCPI3_2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 @@ -486,74 +153,20 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ret i32 %tmp } -define i8 @test_ctpop_i8(i8 %a) nounwind { -; XTENSA-LABEL: test_ctpop_i8: -; XTENSA: movi a8, 255 -; XTENSA-NEXT: and a8, a2, a8 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI12_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI12_1 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI12_2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: ret - %1 = call i8 @llvm.ctpop.i8(i8 %a) - ret i8 %1 -} - -define i16 @test_ctpop_i16(i16 %a) nounwind { -; XTENSA-LABEL: test_ctpop_i16: -; XTENSA: l32r a8, .LCPI13_0 -; XTENSA-NEXT: and a8, a2, a8 -; XTENSA-NEXT: srli a9, a8, 1 -; XTENSA-NEXT: l32r a10, .LCPI13_1 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: sub a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI13_2 -; XTENSA-NEXT: and a10, a8, a9 -; XTENSA-NEXT: srli a8, a8, 2 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: add a8, a10, a8 -; XTENSA-NEXT: srli a9, a8, 4 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI13_3 -; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 -; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 -; XTENSA-NEXT: ret - %1 = call i16 @llvm.ctpop.i16(i16 %a) - ret i16 %1 -} - define i32 @test_ctpop_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_ctpop_i32: ; XTENSA: srli a8, a2, 1 -; XTENSA-NEXT: l32r a9, .LCPI14_0 +; XTENSA-NEXT: l32r a9, .LCPI4_0 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: sub a8, a2, a8 -; XTENSA-NEXT: l32r a9, .LCPI14_1 +; XTENSA-NEXT: l32r a9, .LCPI4_1 ; XTENSA-NEXT: and a10, a8, a9 ; XTENSA-NEXT: srli a8, a8, 2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a10, a8 ; XTENSA-NEXT: srli a9, a8, 4 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: l32r a9, .LCPI14_2 +; XTENSA-NEXT: l32r a9, .LCPI4_2 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 From ccbeae9189ff21784de9f2d3d1994bbf50f6214b Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Mon, 12 Aug 2024 02:04:37 +0300 Subject: [PATCH 07/11] [Xtensa] Minor fixes --- llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 10 ++++++++-- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 15 +-------------- llvm/test/CodeGen/Xtensa/bswap.ll | 1 + llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 1 + llvm/test/CodeGen/Xtensa/div.ll | 1 + llvm/test/CodeGen/Xtensa/mul.ll | 1 + llvm/test/CodeGen/Xtensa/rotl-rotr.ll | 1 + llvm/test/CodeGen/Xtensa/shift.ll | 1 + 8 files changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 869db1c7fcd5a..9f9fac26272de 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -137,7 +137,10 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { case ISD::SHL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (!isa(N1)) { + auto *C = dyn_cast(N1); + // If C is constant in range [1..31] then we can generate SLLI + // instruction using pattern matching, otherwise generate SLL + if (!C || !(isUInt<5>(C->getZExtValue()) && !C->isZero())) { SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N1); SDNode *SLL = CurDAG->getMachineNode(Xtensa::SLL, DL, VT, N0, SDValue(SSL, 0)); @@ -164,7 +167,10 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { case ISD::SRA: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (!isa(N1)) { + auto *C = dyn_cast(N1); + // If C is constant in range [0..31] then we can generate SRAI + // instruction using pattern matching, otherwise generate SRA + if (!C || !isUInt<5>(C->getZExtValue())) { SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); SDNode *SRA = CurDAG->getMachineNode(Xtensa::SRA, DL, VT, N0, SDValue(SSR, 0)); diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 11827021db32f..61ddfb3f2641e 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -765,17 +765,12 @@ SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op, DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); - SDValue HiTrue = DAG.getNode(XtensaISD::SRCL, DL, VT, Hi, Lo, Shamt); - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize); SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT); - Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, Zero); - Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse); return DAG.getMergeValues({Lo, Hi}, DL); @@ -806,21 +801,16 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, // Hi = 0; unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; - SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); SDValue ShamtMinusRegisterSize = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); SDValue LoTrue = DAG.getNode(XtensaISD::SRCR, DL, VT, Hi, Lo, Shamt); - SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize); - SDValue HiFalse; if (IsSRA) { @@ -830,13 +820,10 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, } SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT); - Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, LoFalse); - Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse); - SDValue Ops[2] = {Lo, Hi}; - return DAG.getMergeValues(Ops, DL); + return DAG.getMergeValues({Lo, Hi}, DL); } SDValue XtensaTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/Xtensa/bswap.ll b/llvm/test/CodeGen/Xtensa/bswap.ll index e4458c7cf81c3..9f52de87236a1 100644 --- a/llvm/test/CodeGen/Xtensa/bswap.ll +++ b/llvm/test/CodeGen/Xtensa/bswap.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=XTENSA %s diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index 83dbd1265a0fb..5494e5568906e 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=XTENSA %s diff --git a/llvm/test/CodeGen/Xtensa/div.ll b/llvm/test/CodeGen/Xtensa/div.ll index fcb58eb5bff53..883178acdf69a 100644 --- a/llvm/test/CodeGen/Xtensa/div.ll +++ b/llvm/test/CodeGen/Xtensa/div.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=XTENSA %s diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll index 08b4b1f57166a..9b13897293dc1 100644 --- a/llvm/test/CodeGen/Xtensa/mul.ll +++ b/llvm/test/CodeGen/Xtensa/mul.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=XTENSA %s diff --git a/llvm/test/CodeGen/Xtensa/rotl-rotr.ll b/llvm/test/CodeGen/Xtensa/rotl-rotr.ll index 1dc52fbc94b41..350315e9aefda 100644 --- a/llvm/test/CodeGen/Xtensa/rotl-rotr.ll +++ b/llvm/test/CodeGen/Xtensa/rotl-rotr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=XTENSA %s diff --git a/llvm/test/CodeGen/Xtensa/shift.ll b/llvm/test/CodeGen/Xtensa/shift.ll index 85973e26c2ef4..2b92b8032b626 100644 --- a/llvm/test/CodeGen/Xtensa/shift.ll +++ b/llvm/test/CodeGen/Xtensa/shift.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \ ; RUN: | FileCheck %s From 45b8d8639b91fb44788828894afc8d224902be1d Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Mon, 12 Aug 2024 18:10:15 +0300 Subject: [PATCH 08/11] [Xtensa] Imrpove CTPOP lowering. Minor fixes int SHL/SRC/SRA operations selection. --- llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 29 ++-- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 43 +----- llvm/test/CodeGen/Xtensa/bswap.ll | 128 ++++++++---------- llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 38 +++--- llvm/test/CodeGen/Xtensa/div.ll | 12 +- 5 files changed, 100 insertions(+), 150 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 9f9fac26272de..06a04ace59b0d 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -153,24 +153,33 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); auto *C = dyn_cast(N1); - // If C is constant in range [0..15] then we can generate SRLI - // instruction using pattern matching, otherwise generate SRL - if (!C || !isUInt<4>(C->getZExtValue())) { - SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); - SDNode *SRL = - CurDAG->getMachineNode(Xtensa::SRL, DL, VT, N0, SDValue(SSR, 0)); - ReplaceNode(Node, SRL); + + // If C is constant then we can generate SRLI + // instruction using pattern matching or EXTUI, otherwise generate SRL + if (C) { + if (isUInt<4>(C->getZExtValue())) + break; + unsigned ShAmt = C->getZExtValue(); + SDNode *EXTUI = CurDAG->getMachineNode( + Xtensa::EXTUI, DL, VT, N0, CurDAG->getTargetConstant(ShAmt, DL, VT), + CurDAG->getTargetConstant(32 - ShAmt, DL, VT)); + ReplaceNode(Node, EXTUI); return; } - break; + + SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); + SDNode *SRL = + CurDAG->getMachineNode(Xtensa::SRL, DL, VT, N0, SDValue(SSR, 0)); + ReplaceNode(Node, SRL); + return; } case ISD::SRA: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); auto *C = dyn_cast(N1); - // If C is constant in range [0..31] then we can generate SRAI + // If C is constant then we can generate SRAI // instruction using pattern matching, otherwise generate SRA - if (!C || !isUInt<5>(C->getZExtValue())) { + if (!C) { SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); SDNode *SRA = CurDAG->getMachineNode(Xtensa::SRA, DL, VT, N0, SDValue(SSR, 0)); diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 61ddfb3f2641e..c7675c2f50176 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -827,47 +827,8 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op, } SDValue XtensaTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op->getValueType(0); - SDValue Val = Op.getOperand(0); - SDLoc DL(Op); - - if (VT != MVT::i32) - return SDValue(); - - // CTPOP expansion: - // Val = (Val - (Val >> 1)) & 0x55555555 - // Val = ((Val >> 2) & 0x33333333) + (Val & 0x33333333) - // Val = ((Val >> 4) + Val) & 0x0f0f0f0f - // Val = (Val >> 8) + Val - // Val = (extract bits [16, 20] from Val) + Val - // Val = extract bits [0, 5] from Val - - SDValue Mask = DAG.getConstant(0x55555555, DL, VT); - SDValue Shift = - DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(1, DL, VT)); - SDValue ShiftAndMask = DAG.getNode(ISD::AND, DL, VT, Shift, Mask); - Val = DAG.getNode(ISD::SUB, DL, VT, Val, ShiftAndMask); - - Mask = DAG.getConstant(0x33333333, DL, VT); - Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(2, DL, VT)); - SDValue ValAndMask = DAG.getNode(ISD::AND, DL, VT, Val, Mask); - ShiftAndMask = DAG.getNode(ISD::AND, DL, VT, Shift, Mask); - Val = DAG.getNode(ISD::ADD, DL, VT, ValAndMask, ShiftAndMask); - - Mask = DAG.getConstant(0x0f0f0f0f, DL, VT); - Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(4, DL, VT)); - Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift); - Val = DAG.getNode(ISD::AND, DL, VT, Val, Mask); - - Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(8, DL, VT)); - Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift); - - Shift = DAG.getNode(XtensaISD::EXTUI, DL, VT, Val, - DAG.getConstant(16, DL, VT), DAG.getConstant(5, DL, VT)); - Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift); - - return DAG.getNode(XtensaISD::EXTUI, DL, VT, Val, DAG.getConstant(0, DL, VT), - DAG.getConstant(6, DL, VT)); + auto &TLI = DAG.getTargetLoweringInfo(); + return TLI.expandCTPOP(Op.getNode(), DAG); } bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, diff --git a/llvm/test/CodeGen/Xtensa/bswap.ll b/llvm/test/CodeGen/Xtensa/bswap.ll index 9f52de87236a1..6a87aa84351cf 100644 --- a/llvm/test/CodeGen/Xtensa/bswap.ll +++ b/llvm/test/CodeGen/Xtensa/bswap.ll @@ -24,14 +24,12 @@ define i16 @test_bswap_i16(i16 %a) nounwind { define i32 @test_bswap_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_bswap_i32: -; XTENSA: movi a8, 24 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a8, a2 -; XTENSA-NEXT: srli a9, a2, 8 -; XTENSA-NEXT: l32r a10, .LCPI1_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: or a8, a9, a8 -; XTENSA-NEXT: and a9, a2, a10 +; XTENSA: srli a8, a2, 8 +; XTENSA-NEXT: l32r a9, .LCPI1_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: extui a10, a2, 24, 8 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: and a9, a2, a9 ; XTENSA-NEXT: slli a9, a9, 8 ; XTENSA-NEXT: slli a10, a2, 24 ; XTENSA-NEXT: or a9, a10, a9 @@ -43,28 +41,25 @@ define i32 @test_bswap_i32(i32 %a) nounwind { define i64 @test_bswap_i64(i64 %a) nounwind { ; XTENSA-LABEL: test_bswap_i64: -; XTENSA: movi a9, 24 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a8, a3 -; XTENSA-NEXT: srli a10, a3, 8 -; XTENSA-NEXT: l32r a11, .LCPI2_0 -; XTENSA-NEXT: and a10, a10, a11 -; XTENSA-NEXT: or a8, a10, a8 -; XTENSA-NEXT: and a10, a3, a11 +; XTENSA: srli a8, a3, 8 +; XTENSA-NEXT: l32r a9, .LCPI2_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: extui a10, a3, 24, 8 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: and a10, a3, a9 ; XTENSA-NEXT: slli a10, a10, 8 -; XTENSA-NEXT: slli a7, a3, 24 -; XTENSA-NEXT: or a10, a7, a10 +; XTENSA-NEXT: slli a11, a3, 24 +; XTENSA-NEXT: or a10, a11, a10 ; XTENSA-NEXT: or a8, a10, a8 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a9, a2 ; XTENSA-NEXT: srli a10, a2, 8 -; XTENSA-NEXT: and a10, a10, a11 -; XTENSA-NEXT: or a9, a10, a9 -; XTENSA-NEXT: and a10, a2, a11 -; XTENSA-NEXT: slli a10, a10, 8 +; XTENSA-NEXT: and a10, a10, a9 +; XTENSA-NEXT: extui a11, a2, 24, 8 +; XTENSA-NEXT: or a10, a10, a11 +; XTENSA-NEXT: and a9, a2, a9 +; XTENSA-NEXT: slli a9, a9, 8 ; XTENSA-NEXT: slli a11, a2, 24 -; XTENSA-NEXT: or a10, a11, a10 -; XTENSA-NEXT: or a3, a10, a9 +; XTENSA-NEXT: or a9, a11, a9 +; XTENSA-NEXT: or a3, a9, a10 ; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret %tmp = call i64 @llvm.bswap.i64(i64 %a) @@ -129,14 +124,12 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { define i32 @test_bitreverse_i32(i32 %a) nounwind { ; XTENSA-LABEL: test_bitreverse_i32: -; XTENSA: movi a8, 24 -; XTENSA-NEXT: ssr a8 -; XTENSA-NEXT: srl a8, a2 -; XTENSA-NEXT: srli a9, a2, 8 -; XTENSA-NEXT: l32r a10, .LCPI5_0 -; XTENSA-NEXT: and a9, a9, a10 -; XTENSA-NEXT: or a8, a9, a8 -; XTENSA-NEXT: and a9, a2, a10 +; XTENSA: srli a8, a2, 8 +; XTENSA-NEXT: l32r a9, .LCPI5_0 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: extui a10, a2, 24, 8 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: and a9, a2, a9 ; XTENSA-NEXT: slli a9, a9, 8 ; XTENSA-NEXT: slli a10, a2, 24 ; XTENSA-NEXT: or a9, a10, a9 @@ -166,45 +159,42 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { define i64 @test_bitreverse_i64(i64 %a) nounwind { ; XTENSA-LABEL: test_bitreverse_i64: -; XTENSA: movi a10, 24 -; XTENSA-NEXT: ssr a10 -; XTENSA-NEXT: srl a8, a3 -; XTENSA-NEXT: srli a11, a3, 8 +; XTENSA: srli a8, a3, 8 ; XTENSA-NEXT: l32r a9, .LCPI6_0 -; XTENSA-NEXT: and a11, a11, a9 -; XTENSA-NEXT: or a8, a11, a8 -; XTENSA-NEXT: and a11, a3, a9 -; XTENSA-NEXT: slli a11, a11, 8 -; XTENSA-NEXT: slli a7, a3, 24 -; XTENSA-NEXT: or a11, a7, a11 -; XTENSA-NEXT: or a8, a11, a8 -; XTENSA-NEXT: srli a7, a8, 4 +; XTENSA-NEXT: and a8, a8, a9 +; XTENSA-NEXT: extui a10, a3, 24, 8 +; XTENSA-NEXT: or a8, a8, a10 +; XTENSA-NEXT: and a10, a3, a9 +; XTENSA-NEXT: slli a10, a10, 8 +; XTENSA-NEXT: slli a11, a3, 24 +; XTENSA-NEXT: or a10, a11, a10 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: srli a10, a8, 4 ; XTENSA-NEXT: l32r a11, .LCPI6_1 -; XTENSA-NEXT: and a7, a7, a11 +; XTENSA-NEXT: and a10, a10, a11 ; XTENSA-NEXT: and a8, a8, a11 ; XTENSA-NEXT: slli a8, a8, 4 -; XTENSA-NEXT: or a8, a7, a8 -; XTENSA-NEXT: srli a7, a8, 2 -; XTENSA-NEXT: l32r a6, .LCPI6_2 -; XTENSA-NEXT: and a7, a7, a6 -; XTENSA-NEXT: and a8, a8, a6 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: srli a10, a8, 2 +; XTENSA-NEXT: l32r a7, .LCPI6_2 +; XTENSA-NEXT: and a10, a10, a7 +; XTENSA-NEXT: and a8, a8, a7 ; XTENSA-NEXT: slli a8, a8, 2 -; XTENSA-NEXT: or a8, a7, a8 -; XTENSA-NEXT: srli a7, a8, 1 -; XTENSA-NEXT: l32r a5, .LCPI6_3 -; XTENSA-NEXT: and a7, a7, a5 -; XTENSA-NEXT: and a8, a8, a5 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: srli a10, a8, 1 +; XTENSA-NEXT: l32r a6, .LCPI6_3 +; XTENSA-NEXT: and a10, a10, a6 +; XTENSA-NEXT: and a8, a8, a6 ; XTENSA-NEXT: slli a8, a8, 1 -; XTENSA-NEXT: or a8, a7, a8 -; XTENSA-NEXT: ssr a10 -; XTENSA-NEXT: srl a10, a2 -; XTENSA-NEXT: srli a7, a2, 8 -; XTENSA-NEXT: and a7, a7, a9 -; XTENSA-NEXT: or a10, a7, a10 +; XTENSA-NEXT: or a8, a10, a8 +; XTENSA-NEXT: srli a10, a2, 8 +; XTENSA-NEXT: and a10, a10, a9 +; XTENSA-NEXT: extui a5, a2, 24, 8 +; XTENSA-NEXT: or a10, a10, a5 ; XTENSA-NEXT: and a9, a2, a9 ; XTENSA-NEXT: slli a9, a9, 8 -; XTENSA-NEXT: slli a7, a2, 24 -; XTENSA-NEXT: or a9, a7, a9 +; XTENSA-NEXT: slli a5, a2, 24 +; XTENSA-NEXT: or a9, a5, a9 ; XTENSA-NEXT: or a9, a9, a10 ; XTENSA-NEXT: srli a10, a9, 4 ; XTENSA-NEXT: and a10, a10, a11 @@ -212,13 +202,13 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; XTENSA-NEXT: slli a9, a9, 4 ; XTENSA-NEXT: or a9, a10, a9 ; XTENSA-NEXT: srli a10, a9, 2 -; XTENSA-NEXT: and a10, a10, a6 -; XTENSA-NEXT: and a9, a9, a6 +; XTENSA-NEXT: and a10, a10, a7 +; XTENSA-NEXT: and a9, a9, a7 ; XTENSA-NEXT: slli a9, a9, 2 ; XTENSA-NEXT: or a9, a10, a9 ; XTENSA-NEXT: srli a10, a9, 1 -; XTENSA-NEXT: and a10, a10, a5 -; XTENSA-NEXT: and a9, a9, a5 +; XTENSA-NEXT: and a10, a10, a6 +; XTENSA-NEXT: and a9, a9, a6 ; XTENSA-NEXT: slli a9, a9, 1 ; XTENSA-NEXT: or a3, a10, a9 ; XTENSA-NEXT: or a2, a8, a8 diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll index 5494e5568906e..f58bed19d4ee7 100644 --- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll @@ -29,11 +29,11 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI0_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: slli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: slli a9, a8, 16 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a8, a8, 0, 6 +; XTENSA-NEXT: extui a8, a8, 24, 8 ; XTENSA-NEXT: .LBB0_2: # %cond.end ; XTENSA-NEXT: or a2, a8, a8 ; XTENSA-NEXT: ret @@ -60,11 +60,11 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI1_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: slli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: slli a9, a8, 16 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 +; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true) ret i32 %tmp @@ -85,9 +85,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, 16 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a9, a8 +; XTENSA-NEXT: extui a9, a8, 16, 16 ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: movi a9, -1 ; XTENSA-NEXT: xor a8, a8, a9 @@ -104,11 +102,11 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI2_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: slli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: slli a9, a8, 16 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 +; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: .LBB2_2: # %cond.end ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) @@ -125,9 +123,7 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: srli a9, a8, 8 ; XTENSA-NEXT: or a8, a8, a9 -; XTENSA-NEXT: movi a9, 16 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a9, a8 +; XTENSA-NEXT: extui a9, a8, 16, 16 ; XTENSA-NEXT: or a8, a8, a9 ; XTENSA-NEXT: movi a9, -1 ; XTENSA-NEXT: xor a8, a8, a9 @@ -144,11 +140,11 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI3_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: slli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: slli a9, a8, 16 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 +; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true) ret i32 %tmp @@ -169,11 +165,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; XTENSA-NEXT: add a8, a8, a9 ; XTENSA-NEXT: l32r a9, .LCPI4_2 ; XTENSA-NEXT: and a8, a8, a9 -; XTENSA-NEXT: srli a9, a8, 8 +; XTENSA-NEXT: slli a9, a8, 8 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a9, a8, 16, 5 +; XTENSA-NEXT: slli a9, a8, 16 ; XTENSA-NEXT: add a8, a8, a9 -; XTENSA-NEXT: extui a2, a8, 0, 6 +; XTENSA-NEXT: extui a2, a8, 24, 8 ; XTENSA-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/Xtensa/div.ll b/llvm/test/CodeGen/Xtensa/div.ll index 883178acdf69a..e10e976fb1b38 100644 --- a/llvm/test/CodeGen/Xtensa/div.ll +++ b/llvm/test/CodeGen/Xtensa/div.ll @@ -451,9 +451,7 @@ define i16 @sdiv16_constant_lhs(i16 %a) nounwind { define i32 @sdiv_pow2(i32 %a) nounwind { ; XTENSA-LABEL: sdiv_pow2: ; XTENSA: srai a8, a2, 31 -; XTENSA-NEXT: movi a9, 29 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a8, a8 +; XTENSA-NEXT: extui a8, a8, 29, 3 ; XTENSA-NEXT: add a8, a2, a8 ; XTENSA-NEXT: srai a2, a8, 3 ; XTENSA-NEXT: ret @@ -464,9 +462,7 @@ define i32 @sdiv_pow2(i32 %a) nounwind { define i32 @sdiv_pow2_2(i32 %a) nounwind { ; XTENSA-LABEL: sdiv_pow2_2: ; XTENSA: srai a8, a2, 31 -; XTENSA-NEXT: movi a9, 16 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a8, a8 +; XTENSA-NEXT: extui a8, a8, 16, 16 ; XTENSA-NEXT: add a8, a2, a8 ; XTENSA-NEXT: srai a2, a8, 16 ; XTENSA-NEXT: ret @@ -478,9 +474,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; XTENSA-LABEL: sdiv16_pow2: ; XTENSA: slli a8, a2, 16 ; XTENSA-NEXT: srai a8, a8, 16 -; XTENSA-NEXT: movi a9, 28 -; XTENSA-NEXT: ssr a9 -; XTENSA-NEXT: srl a8, a8 +; XTENSA-NEXT: extui a8, a8, 28, 4 ; XTENSA-NEXT: movi a9, 7 ; XTENSA-NEXT: and a8, a8, a9 ; XTENSA-NEXT: add a8, a2, a8 From b5a9f2c62e84594d0bc3a3eead5b652525a2c6a9 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Mon, 12 Aug 2024 21:03:32 +0300 Subject: [PATCH 09/11] [Xtensa] Fix shift tests. --- llvm/test/CodeGen/Xtensa/shift.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/Xtensa/shift.ll b/llvm/test/CodeGen/Xtensa/shift.ll index 2b92b8032b626..729b66b12ab20 100644 --- a/llvm/test/CodeGen/Xtensa/shift.ll +++ b/llvm/test/CodeGen/Xtensa/shift.ll @@ -44,7 +44,7 @@ define i32 @lshr(i32 %x, i32 %y) nounwind { ret i32 %c } -define i32 @lshr_imm_1(i32 %x, i32 %y) nounwind { +define i32 @lshr_imm_1(i32 %x) nounwind { ; CHECK-LABEL: lshr_imm_1: ; CHECK: srli a2, a2, 1 ; CHECK-NEXT: ret @@ -52,7 +52,7 @@ define i32 @lshr_imm_1(i32 %x, i32 %y) nounwind { ret i32 %c } -define i32 @lshr_imm_15(i32 %x, i32 %y) nounwind { +define i32 @lshr_imm_15(i32 %x) nounwind { ; CHECK-LABEL: lshr_imm_15: ; CHECK: srli a2, a2, 15 ; CHECK-NEXT: ret @@ -69,7 +69,7 @@ define i32 @ashr(i32 %x, i32 %y) nounwind { ret i32 %c } -define i32 @ashr_imm_1(i32 %x, i32 %y) nounwind { +define i32 @ashr_imm_1(i32 %x) nounwind { ; CHECK-LABEL: ashr_imm_1: ; CHECK: srai a2, a2, 1 ; CHECK-NEXT: ret @@ -77,7 +77,7 @@ define i32 @ashr_imm_1(i32 %x, i32 %y) nounwind { ret i32 %c } -define i32 @ashr_imm_10(i32 %x, i32 %y) nounwind { +define i32 @ashr_imm_10(i32 %x) nounwind { ; CHECK-LABEL: ashr_imm_10: ; CHECK: srai a2, a2, 10 ; CHECK-NEXT: ret @@ -85,7 +85,7 @@ define i32 @ashr_imm_10(i32 %x, i32 %y) nounwind { ret i32 %c } -define i32 @ashr_imm_31(i32 %x, i32 %y) nounwind { +define i32 @ashr_imm_31(i32 %x) nounwind { ; CHECK-LABEL: ashr_imm_31: ; CHECK: srai a2, a2, 31 ; CHECK-NEXT: ret From 57a942ece3c8271057be680982e693863abf7cbc Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Wed, 14 Aug 2024 22:53:33 +0300 Subject: [PATCH 10/11] [Xtensa] Fix SHL instruction shift and add SRL test. --- llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 2 +- llvm/test/CodeGen/Xtensa/shift.ll | 32 ++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 06a04ace59b0d..0e5580020fd92 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -140,7 +140,7 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { auto *C = dyn_cast(N1); // If C is constant in range [1..31] then we can generate SLLI // instruction using pattern matching, otherwise generate SLL - if (!C || !(isUInt<5>(C->getZExtValue()) && !C->isZero())) { + if (!C || C->isZero()) { SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N1); SDNode *SLL = CurDAG->getMachineNode(Xtensa::SLL, DL, VT, N0, SDValue(SSL, 0)); diff --git a/llvm/test/CodeGen/Xtensa/shift.ll b/llvm/test/CodeGen/Xtensa/shift.ll index 729b66b12ab20..87e847f65d837 100644 --- a/llvm/test/CodeGen/Xtensa/shift.ll +++ b/llvm/test/CodeGen/Xtensa/shift.ll @@ -60,6 +60,14 @@ define i32 @lshr_imm_15(i32 %x) nounwind { ret i32 %c } +define i32 @lshr_imm_20(i32 %x) nounwind { +; CHECK-LABEL: lshr_imm_20: +; CHECK: extui a2, a2, 20, 12 +; CHECK-NEXT: ret + %c = lshr i32 %x, 20 + ret i32 %c +} + define i32 @ashr(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ashr: ; CHECK: ssr a3 @@ -101,16 +109,16 @@ define i64 @lshl_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: ssl a8 ; CHECK-NEXT: sll a10, a2 ; CHECK-NEXT: movi a9, 0 -; CHECK-NEXT: blt a8, a9, .LBB11_2 +; CHECK-NEXT: blt a8, a9, .LBB12_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: or a3, a10, a10 -; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: ssl a4 ; CHECK-NEXT: sll a2, a2 -; CHECK-NEXT: blt a8, a9, .LBB11_4 +; CHECK-NEXT: blt a8, a9, .LBB12_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: or a2, a9, a9 -; CHECK-NEXT: .LBB11_4: +; CHECK-NEXT: .LBB12_4: ; CHECK-NEXT: ret %c = shl i64 %x, %y ret i64 %c @@ -124,16 +132,16 @@ define i64 @lshr_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: ssr a8 ; CHECK-NEXT: srl a10, a3 ; CHECK-NEXT: movi a9, 0 -; CHECK-NEXT: blt a8, a9, .LBB12_2 +; CHECK-NEXT: blt a8, a9, .LBB13_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: or a2, a10, a10 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: .LBB13_2: ; CHECK-NEXT: ssr a4 ; CHECK-NEXT: srl a3, a3 -; CHECK-NEXT: blt a8, a9, .LBB12_4 +; CHECK-NEXT: blt a8, a9, .LBB13_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: or a3, a9, a9 -; CHECK-NEXT: .LBB12_4: +; CHECK-NEXT: .LBB13_4: ; CHECK-NEXT: ret %c = lshr i64 %x, %y ret i64 %c @@ -147,16 +155,16 @@ define i64 @ashr_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: ssr a9 ; CHECK-NEXT: sra a8, a3 ; CHECK-NEXT: movi a10, 0 -; CHECK-NEXT: blt a9, a10, .LBB13_2 +; CHECK-NEXT: blt a9, a10, .LBB14_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: or a2, a8, a8 -; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: .LBB14_2: ; CHECK-NEXT: ssr a4 ; CHECK-NEXT: sra a8, a3 -; CHECK-NEXT: blt a9, a10, .LBB13_4 +; CHECK-NEXT: blt a9, a10, .LBB14_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: srai a8, a3, 31 -; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: .LBB14_4: ; CHECK-NEXT: or a3, a8, a8 ; CHECK-NEXT: ret %c = ashr i64 %x, %y From cf1c0c14579231a5a68f3d706cc898aa0fd5e136 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Sun, 18 Aug 2024 23:53:48 +0300 Subject: [PATCH 11/11] [Xtensa] Minor fixes in comments. --- llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 0e5580020fd92..6f6d3342fcd7f 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -67,7 +67,7 @@ class XtensaDAGToDAGISel : public SelectionDAGISel { Valid = isValidAddrOffset(Scale, OffsetVal); if (Valid) { - // If the first operand is a FI, get the TargetFI Node + // If the first operand is a FI, get the TargetFI Node. if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); @@ -139,7 +139,7 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); auto *C = dyn_cast(N1); // If C is constant in range [1..31] then we can generate SLLI - // instruction using pattern matching, otherwise generate SLL + // instruction using pattern matching, otherwise generate SLL. if (!C || C->isZero()) { SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N1); SDNode *SLL = @@ -155,7 +155,7 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { auto *C = dyn_cast(N1); // If C is constant then we can generate SRLI - // instruction using pattern matching or EXTUI, otherwise generate SRL + // instruction using pattern matching or EXTUI, otherwise generate SRL. if (C) { if (isUInt<4>(C->getZExtValue())) break; @@ -178,7 +178,7 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); auto *C = dyn_cast(N1); // If C is constant then we can generate SRAI - // instruction using pattern matching, otherwise generate SRA + // instruction using pattern matching, otherwise generate SRA. if (!C) { SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1); SDNode *SRA =