Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Xtensa] Implement lowering Mul/Div/Shift operations. #99981

Merged
merged 11 commits into from
Aug 22, 2024
80 changes: 79 additions & 1 deletion llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class XtensaDAGToDAGISel : public SelectionDAGISel {
Valid = isValidAddrOffset(Scale, OffsetVal);

if (Valid) {
// If the first operand is a FI, get the TargetFI Node
// If the first operand is a FI, get the TargetFI Node.
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
Expand Down Expand Up @@ -125,12 +125,90 @@ FunctionPass *llvm::createXtensaISelDag(XtensaTargetMachine &TM,

void XtensaDAGToDAGISel::Select(SDNode *Node) {
SDLoc DL(Node);
EVT VT = Node->getValueType(0);

// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
Node->setNodeId(-1);
return;
}

switch (Node->getOpcode()) {
case ISD::SHL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
auto *C = dyn_cast<ConstantSDNode>(N1);
// If C is constant in range [1..31] then we can generate SLLI
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constant should always be in this range.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make it consistent with SRA.

Copy link
Contributor Author

@andreisfr andreisfr Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. I left checking shift value for zero(probably in some hypotetical situation it may appear).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment still needs fixing.

NB
Sentences in comments should end with a period, here and elsewhere. Sorry for having to remind you.

// instruction using pattern matching, otherwise generate SLL.
if (!C || C->isZero()) {
SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N1);
SDNode *SLL =
CurDAG->getMachineNode(Xtensa::SLL, DL, VT, N0, SDValue(SSL, 0));
ReplaceNode(Node, SLL);
return;
}
break;
}
case ISD::SRL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
auto *C = dyn_cast<ConstantSDNode>(N1);

// If C is constant then we can generate SRLI
// instruction using pattern matching or EXTUI, otherwise generate SRL.
if (C) {
if (isUInt<4>(C->getZExtValue()))
break;
unsigned ShAmt = C->getZExtValue();
SDNode *EXTUI = CurDAG->getMachineNode(
Xtensa::EXTUI, DL, VT, N0, CurDAG->getTargetConstant(ShAmt, DL, VT),
CurDAG->getTargetConstant(32 - ShAmt, DL, VT));
ReplaceNode(Node, EXTUI);
return;
}

SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1);
SDNode *SRL =
CurDAG->getMachineNode(Xtensa::SRL, DL, VT, N0, SDValue(SSR, 0));
ReplaceNode(Node, SRL);
return;
}
case ISD::SRA: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
auto *C = dyn_cast<ConstantSDNode>(N1);
// If C is constant then we can generate SRAI
// instruction using pattern matching, otherwise generate SRA.
if (!C) {
SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1);
SDNode *SRA =
CurDAG->getMachineNode(Xtensa::SRA, DL, VT, N0, SDValue(SSR, 0));
ReplaceNode(Node, SRA);
return;
}
break;
}
case XtensaISD::SRCL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
SDValue N2 = Node->getOperand(2);
SDNode *SSL = CurDAG->getMachineNode(Xtensa::SSL, DL, MVT::Glue, N2);
SDNode *SRC =
CurDAG->getMachineNode(Xtensa::SRC, DL, VT, N0, N1, SDValue(SSL, 0));
ReplaceNode(Node, SRC);
return;
}
case XtensaISD::SRCR: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
SDValue N2 = Node->getOperand(2);
SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N2);
SDNode *SRC =
CurDAG->getMachineNode(Xtensa::SRC, DL, VT, N0, N1, SDValue(SSR, 0));
Comment on lines +205 to +207
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems surprising to me to handle this as a function of selection, and not legalization but it's fine

ReplaceNode(Node, SRC);
return;
}
}

SelectCode(Node);
}
172 changes: 167 additions & 5 deletions llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
Expand Down Expand Up @@ -98,6 +99,32 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, MVT::i32, Expand);
setCondCodeAction(ISD::SETULE, MVT::i32, Expand);

setOperationAction(ISD::MUL, MVT::i32, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);

setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
s-barannikov marked this conversation as resolved.
Show resolved Hide resolved
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);

// Implement custom stack allocations
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
// Implement custom stack save and restore
Expand Down Expand Up @@ -629,8 +656,12 @@ SDValue XtensaTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Table.getValueType();
unsigned EntrySize = MJTI->getEntrySize(TD);

Index = DAG.getNode(ISD::MUL, DL, Index.getValueType(), Index,
DAG.getConstant(EntrySize, DL, Index.getValueType()));
assert((MJTI->getEntrySize(TD) == 4) && "Unsupported jump-table entry size");

Index = DAG.getNode(
ISD::SHL, DL, Index.getValueType(), Index,
DAG.getConstant(Log2_32(EntrySize), DL, Index.getValueType()));

SDValue Addr = DAG.getNode(ISD::ADD, DL, Index.getValueType(), Index, Table);
SDValue LD =
DAG.getLoad(PtrVT, DL, Chain, Addr,
Expand Down Expand Up @@ -662,10 +693,12 @@ SDValue XtensaTargetLowering::getAddrPCRel(SDValue Op,
return DAG.getNode(XtensaISD::PCREL_WRAPPER, DL, Ty, Op);
}

SDValue XtensaTargetLowering::LowerConstantPool(ConstantPoolSDNode *CP,
SDValue XtensaTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Result;

if (!CP->isMachineConstantPoolEntry()) {
Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
CP->getOffset());
Expand Down Expand Up @@ -713,6 +746,119 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}

SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = MVT::i32;
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);

// if Shamt - register size < 0: // Shamt < register size
// Lo = Lo << Shamt
// Hi = (Hi << Shamt) | (Lo >>u (register size - Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt - register size)

SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT);
SDValue ShamtMinusRegisterSize =
DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize);

SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue HiTrue = DAG.getNode(XtensaISD::SRCL, DL, VT, Hi, Lo, Shamt);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize);

SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, Zero);
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse);

return DAG.getMergeValues({Lo, Hi}, DL);
}
s-barannikov marked this conversation as resolved.
Show resolved Hide resolved

SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op,
SelectionDAG &DAG,
bool IsSRA) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
MVT VT = MVT::i32;

// SRA expansion:
// if Shamt - register size < 0: // Shamt < register size
// Lo = (Lo >>u Shamt) | (Hi << u (register size - Shamt))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt - register size);
// Hi = Hi >>s (register size - 1)
//
// SRL expansion:
// if Shamt - register size < 0: // Shamt < register size
// Lo = (Lo >>u Shamt) | (Hi << u (register size - Shamt))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt - register size);
// Hi = 0;

unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT);
SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT);
SDValue ShamtMinusRegisterSize =
DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize);

SDValue LoTrue = DAG.getNode(XtensaISD::SRCR, DL, VT, Hi, Lo, Shamt);
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue LoFalse =
DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize);
SDValue HiFalse;

if (IsSRA) {
HiFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, RegisterSizeMinus1);
} else {
HiFalse = Zero;
}

SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, LoFalse);
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse);

return DAG.getMergeValues({Lo, Hi}, DL);
}

SDValue XtensaTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
auto &TLI = DAG.getTargetLoweringInfo();
return TLI.expandCTPOP(Op.getNode(), DAG);
Comment on lines +830 to +831
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TLI is this. Also I would expect this custom lowering to just be identical to what you get for expand?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TLI is this. Also I would expect this custom lowering to just be identical to what you get for expand?

The instruction sequence which I proposed before is a bit different from sequence generated by TLI.expandCTPOP(). It is because I used "right shift and mask" at the last stage and thus need to use 2 EXTUI operations in my implementation. The TLI.expandCTPOP() uses "left shift and mask" at the last stage, as I understand, but instruction amount in both implementations is the same.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also CTPOP isn't mentioned in the patch description. Should this be done separately?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean to change first patch description from "[Xtensa] Implement lowering Mul/Div/Shift operations." to something like "[Xtensa] Implement lowering Mul/Div/Shift/CTTZ/CTLZ/CTPOP operations."? I can force update patches

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either that, or split the ctpop into a separate pR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated patch description

}

bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const {
APInt Imm;
unsigned EltSizeInBits;

if (ISD::isConstantSplatVector(C.getNode(), Imm)) {
EltSizeInBits = VT.getScalarSizeInBits();
} else if (VT.isScalarInteger()) {
EltSizeInBits = VT.getSizeInBits();
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode()))
Imm = ConstNode->getAPIntValue();
else
return false;
} else {
return false;
}

// Omit if data size exceeds.
if (EltSizeInBits > 32)
return false;

// Convert MULT to LSL.
if (Imm.isPowerOf2() && Imm.isIntN(5))
return true;

return false;
}

SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
Expand All @@ -726,8 +872,10 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerBlockAddress(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
return LowerConstantPool(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::STACKSAVE:
Expand All @@ -736,6 +884,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerSTACKRESTORE(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
return LowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG, false);
default:
report_fatal_error("Unexpected node to lower");
}
Expand All @@ -747,12 +901,18 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "XtensaISD::BR_JT";
case XtensaISD::CALL:
return "XtensaISD::CALL";
case XtensaISD::EXTUI:
return "XtensaISD::EXTUI";
case XtensaISD::PCREL_WRAPPER:
return "XtensaISD::PCREL_WRAPPER";
case XtensaISD::RET:
return "XtensaISD::RET";
case XtensaISD::SELECT_CC:
return "XtensaISD::SELECT_CC";
case XtensaISD::SRCL:
return "XtensaISD::SRCL";
case XtensaISD::SRCR:
return "XtensaISD::SRCR";
}
return nullptr;
}
Expand Down Expand Up @@ -827,6 +987,8 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,

MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();

switch (MI.getOpcode()) {
case Xtensa::SELECT:
return emitSelectCC(MI, MBB);
Expand Down
Loading