Skip to content

Commit

Permalink
[AMDGPU] Constant fold FMAD_FTZ (#69443)
Browse files Browse the repository at this point in the history
Solves #68315
  • Loading branch information
Pierre-vh authored Oct 19, 2023
1 parent 25002b7 commit 40a426f
Show file tree
Hide file tree
Showing 5 changed files with 572 additions and 970 deletions.
30 changes: 30 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5041,6 +5041,36 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performAssertSZExtCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicWOChainCombine(N, DCI);
case AMDGPUISD::FMAD_FTZ: {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);

// FMAD_FTZ is a FMAD + flush denormals to zero.
// We flush the inputs, the intermediate step, and the output.
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
if (N0CFP && N1CFP && N2CFP) {
const auto FTZ = [](const APFloat &V) {
if (V.isDenormal()) {
APFloat Zero(V.getSemantics(), 0);
return V.isNegative() ? -Zero : Zero;
}
return V;
};

APFloat V0 = FTZ(N0CFP->getValueAPF());
APFloat V1 = FTZ(N1CFP->getValueAPF());
APFloat V2 = FTZ(N2CFP->getValueAPF());
V0.multiply(V1, APFloat::rmNearestTiesToEven);
V0 = FTZ(V0);
V0.add(V2, APFloat::rmNearestTiesToEven);
return DAG.getConstantFP(FTZ(V0), DL, VT);
}
break;
}
}
return SDValue();
}
Expand Down
Loading

0 comments on commit 40a426f

Please sign in to comment.