Skip to content

Commit

Permalink
Merge pull request #3951 from alyssarosenzweig/opt/pops
Browse files Browse the repository at this point in the history
Add a hack for multiple destinations & make good use of it
  • Loading branch information
Sonicadvance1 authored Aug 14, 2024
2 parents 33558e6 + f5bc064 commit aa5d2ff
Show file tree
Hide file tree
Showing 32 changed files with 536 additions and 860 deletions.
39 changes: 27 additions & 12 deletions FEXCore/Scripts/json_ir_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,36 @@ def parse_ops(ops):

RHS = EqualSplit[0].strip()
if len(EqualSplit) > 1:
OpDef.HasDest = True
LHS = EqualSplit[0].strip()
RHS = EqualSplit[1].strip()

# Parse the destination, must be one type of SSA, GPR, or FPR
ResultType = EqualSplit[0].strip()
if ResultType == "SSA":
OpDef.DestType = "SSA" # We don't know this type right now
elif ResultType == "GPR":
OpDef.DestType = "GPR"
elif ResultType == "GPRPair":
OpDef.DestType = "GPRPair"
elif ResultType == "FPR":
OpDef.DestType = "FPR"
if ":" in LHS:
# Named destinations. This is a hack, but so is the entire
# multi-destination support bolten onto the old IR...
#
# Named destinations require side effects because they break
# SSA hard. Validate that.
assert("HasSideEffects" in op_val and op_val["HasSideEffects"])

for Dest in LHS.split(","):
Dest = Dest.strip()
DType, Name = Dest.split(":$")

# If the destination appears also as a source, it is
# read-modify-write.
if Dest in RHS:
# Turn RMW into an in/out source
RHS = RHS.replace(Dest.strip(), f"{DType}:$Inout{Name}")
else:
# Turn named destinations into an out source.
RHS += f", {DType}:$Out{Name}"
else:
ExitError("Unknown destination class type {}. Needs to be one of {SSA, GPR, GPRPair, FPR}".format(ResultType))
# Single anonymous destination
if LHS not in ["SSA", "GPR", "GPRPair", "FPR"]:
ExitError(f"Unknown destination class type {LHS}. Needs to be one of SSA, GPR, GPRPair, FPR")

OpDef.HasDest = True
OpDef.DestType = LHS

# IR Op needs to start with a name
RHS = RHS.split(" ", 1)
Expand Down
26 changes: 2 additions & 24 deletions FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,6 @@ DEF_BINOP_WITH_CONSTANT(Lshl, lslv, lsl)
DEF_BINOP_WITH_CONSTANT(Lshr, lsrv, lsr)
DEF_BINOP_WITH_CONSTANT(Ror, rorv, ror)

DEF_OP(TruncElementPair) {
auto Op = IROp->C<IR::IROp_TruncElementPair>();

switch (IROp->Size) {
case 4: {
auto Dst = GetRegPair(Node);
auto Src = GetRegPair(Op->Pair.ID());
mov(ARMEmitter::Size::i32Bit, Dst.first, Src.first);
mov(ARMEmitter::Size::i32Bit, Dst.second, Src.second);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled Truncation size: {}", IROp->Size); break;
}
}

DEF_OP(Constant) {
auto Op = IROp->C<IR::IROp_Constant>();
auto Dst = GetReg(Node);
Expand Down Expand Up @@ -232,10 +217,8 @@ DEF_OP(CmpPairZ) {
mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

// Compare, setting Z and clobbering NzCV
const auto Src1 = GetRegPair(Op->Src1.ID());
const auto Src2 = GetRegPair(Op->Src2.ID());
cmp(EmitSize, Src1.first, Src2.first);
ccmp(EmitSize, Src1.second, Src2.second, ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);
cmp(EmitSize, GetReg(Op->Src1Lo.ID()), GetReg(Op->Src2Lo.ID()));
ccmp(EmitSize, GetReg(Op->Src1Hi.ID()), GetReg(Op->Src2Hi.ID()), ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);

// Restore NzCV
if (CTX->HostFeatures.SupportsFlagM) {
Expand Down Expand Up @@ -1373,11 +1356,6 @@ DEF_OP(Select) {
const auto Src2 = GetReg(Op->Cmp2.ID());
cmp(CompareEmitSize, Src1, Src2);
}
} else if (IsGPRPair(Op->Cmp1.ID())) {
const auto Src1 = GetRegPair(Op->Cmp1.ID());
const auto Src2 = GetRegPair(Op->Cmp2.ID());
cmp(EmitSize, Src1.first, Src2.first);
ccmp(EmitSize, Src1.second, Src2.second, ARMEmitter::StatusFlags::None, cc);
} else if (IsFPR(Op->Cmp1.ID())) {
const auto Src1 = GetVReg(Op->Cmp1.ID());
const auto Src2 = GetVReg(Op->Cmp2.ID());
Expand Down
54 changes: 39 additions & 15 deletions FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,43 @@ DEF_OP(CASPair) {
auto Op = IROp->C<IR::IROp_CASPair>();
LOGMAN_THROW_AA_FMT(IROp->ElementSize == 4 || IROp->ElementSize == 8, "Wrong element size");
// Size is the size of each pair element
auto Dst = GetRegPair(Node);
auto Expected = GetRegPair(Op->Expected.ID());
auto Desired = GetRegPair(Op->Desired.ID());
auto Dst0 = GetReg(Op->OutLo.ID());
auto Dst1 = GetReg(Op->OutHi.ID());
auto Expected0 = GetReg(Op->ExpectedLo.ID());
auto Expected1 = GetReg(Op->ExpectedHi.ID());
auto Desired0 = GetReg(Op->DesiredLo.ID());
auto Desired1 = GetReg(Op->DesiredHi.ID());
auto MemSrc = GetReg(Op->Addr.ID());

const auto EmitSize = IROp->ElementSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
if (CTX->HostFeatures.SupportsAtomics) {
mov(EmitSize, TMP3, Expected.first);
mov(EmitSize, TMP4, Expected.second);
// RA has heuristics to try to pair sources, but we need to handle the cases
// where they fail. We do so by moving to temporaries. Note we use 64-bit
// moves here even for 32-bit cmpxchg, for the Firestorm register renamer.
if (Desired1.Idx() != (Desired0.Idx() + 1) || Desired0.Idx() & 1) {
mov(ARMEmitter::Size::i64Bit, TMP1, Desired0);
mov(ARMEmitter::Size::i64Bit, TMP2, Desired1);
Desired0 = TMP1;
Desired1 = TMP2;
}

auto CaspalDst0 = Dst0;
auto CaspalDst1 = Dst1;
if (CaspalDst1.Idx() != (CaspalDst0.Idx() + 1) || CaspalDst0.Idx() & 1) {
CaspalDst0 = TMP3;
CaspalDst1 = TMP4;
}

caspal(EmitSize, TMP3, TMP4, Desired.first, Desired.second, MemSrc);
mov(EmitSize, Dst.first, TMP3.R());
mov(EmitSize, Dst.second, TMP4.R());
// We can't clobber the source, these moves are inherently required due to
// ISA limitations. But by making them 64-bit, Firestorm can rename.
mov(ARMEmitter::Size::i64Bit, CaspalDst0, Expected0);
mov(ARMEmitter::Size::i64Bit, CaspalDst1, Expected1);
caspal(EmitSize, CaspalDst0, CaspalDst1, Desired0, Desired1, MemSrc);

if (CaspalDst0 != Dst0) {
mov(ARMEmitter::Size::i64Bit, Dst0, CaspalDst0);
mov(ARMEmitter::Size::i64Bit, Dst1, CaspalDst1);
}
} else {
// Save NZCV so we don't have to mark this op as clobbering NZCV (the
// SupportsAtomics does not clobber atomics and this !SupportsAtomics path
Expand All @@ -43,19 +67,19 @@ DEF_OP(CASPair) {

// This instruction sequence must be synced with HandleCASPAL_Armv8.
ldaxp(EmitSize, TMP2, TMP3, MemSrc);
cmp(EmitSize, TMP2, Expected.first);
ccmp(EmitSize, TMP3, Expected.second, ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);
cmp(EmitSize, TMP2, Expected0);
ccmp(EmitSize, TMP3, Expected1, ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);
b(ARMEmitter::Condition::CC_NE, &LoopNotExpected);
stlxp(EmitSize, TMP2, Desired.first, Desired.second, MemSrc);
stlxp(EmitSize, TMP2, Desired0, Desired1, MemSrc);
cbnz(EmitSize, TMP2, &LoopTop);
mov(EmitSize, Dst.first, Expected.first);
mov(EmitSize, Dst.second, Expected.second);
mov(EmitSize, Dst0, Expected0);
mov(EmitSize, Dst1, Expected1);

b(&LoopExpected);

Bind(&LoopNotExpected);
mov(EmitSize, Dst.first, TMP2.R());
mov(EmitSize, Dst.second, TMP3.R());
mov(EmitSize, Dst0, TMP2.R());
mov(EmitSize, Dst1, TMP3.R());
// exclusive monitor needs to be cleared here
// Might have hit the case where ldaxr was hit but stlxr wasn't
clrex();
Expand Down
17 changes: 8 additions & 9 deletions FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,11 @@ DEF_OP(CPUID) {
PopDynamicRegsAndLR();

// Results are in x0, x1
// Results want to be in a i64v2 vector
auto Dst = GetRegPair(Node);
mov(ARMEmitter::Size::i64Bit, Dst.first, TMP1);
mov(ARMEmitter::Size::i64Bit, Dst.second, TMP2);
// Results want to be 4xi32 scalars
mov(ARMEmitter::Size::i32Bit, GetReg(Op->OutEAX.ID()), TMP1);
mov(ARMEmitter::Size::i32Bit, GetReg(Op->OutECX.ID()), TMP2);
ubfx(ARMEmitter::Size::i64Bit, GetReg(Op->OutEBX.ID()), TMP1, 32, 32);
ubfx(ARMEmitter::Size::i64Bit, GetReg(Op->OutEDX.ID()), TMP2, 32, 32);
}

DEF_OP(XGetBV) {
Expand Down Expand Up @@ -459,11 +460,9 @@ DEF_OP(XGetBV) {

PopDynamicRegsAndLR();

// Results are in x0
// Results want to be in a i32v2 vector
auto Dst = GetRegPair(Node);
mov(ARMEmitter::Size::i32Bit, Dst.first, TMP1);
lsr(ARMEmitter::Size::i64Bit, Dst.second, TMP1, 32);
// Results are in x0, need to split into i32 parts
mov(ARMEmitter::Size::i32Bit, GetReg(Op->OutEAX.ID()), TMP1);
ubfx(ARMEmitter::Size::i64Bit, GetReg(Op->OutEDX.ID()), TMP1, 32, 32);
}

#undef DEF_OP
Expand Down
6 changes: 0 additions & 6 deletions FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,12 +654,6 @@ bool Arm64JITCore::IsGPR(IR::NodeID Node) const {
return Class == IR::GPRClass || Class == IR::GPRFixedClass;
}

bool Arm64JITCore::IsGPRPair(IR::NodeID Node) const {
auto Class = GetRegClass(Node);

return Class == IR::GPRPairClass;
}

CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, const FEXCore::IR::IRListView* IR, FEXCore::Core::DebugData* DebugData,
const FEXCore::IR::RegisterAllocationData* RAData) {
FEXCORE_PROFILE_SCOPED("Arm64::CompileCode");
Expand Down
11 changes: 0 additions & 11 deletions FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,6 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {
FEX_UNREACHABLE;
}

[[nodiscard]]
std::pair<ARMEmitter::Register, ARMEmitter::Register> GetRegPair(IR::NodeID Node) const {
const auto Reg = GetPhys(Node);

LOGMAN_THROW_AA_FMT(Reg.Class == IR::GPRPairClass.Val, "Unexpected Class: {}", Reg.Class);

return std::make_pair(GeneralRegisters[Reg.Reg], GeneralRegisters[Reg.Reg + 1]);
}

[[nodiscard]]
FEXCore::IR::RegisterClassType GetRegClass(IR::NodeID Node) const;

Expand Down Expand Up @@ -253,8 +244,6 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {
bool IsFPR(IR::NodeID Node) const;
[[nodiscard]]
bool IsGPR(IR::NodeID Node) const;
[[nodiscard]]
bool IsGPRPair(IR::NodeID Node) const;

[[nodiscard]]
ARMEmitter::ExtendedMemOperand GenerateMemOperand(uint8_t AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
Expand Down
Loading

0 comments on commit aa5d2ff

Please sign in to comment.