Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IR: Add some interpreter-only IR instructions for faster interpretation #19262

Merged
merged 4 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Core/MIPS/IR/IRFrontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,14 +284,15 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &m
&PropagateConstants,
&PurgeTemps,
&ReduceVec4Flush,
&OptimizeLoadsAfterStores,
// &ReorderLoadStore,
// &MergeLoadStore,
// &ThreeOpToTwoOp,
};

if (opts.optimizeForInterpreter) {
// Add special passes here.
// passes.push_back(&ReorderLoadStore);
passes.push_back(&OptimizeForInterpreter);
}
if (IRApplyPasses(passes.data(), passes.size(), ir, simplified, opts))
logBlocks = 1;
Expand Down
20 changes: 17 additions & 3 deletions Core/MIPS/IR/IRInst.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "Common/CommonFuncs.h"
#include "Common/Log.h"
#include "Core/MIPS/IR/IRInst.h"
#include "Core/MIPS/MIPSDebugInterface.h"
#include "Core/HLE/ReplaceTables.h"
Expand All @@ -8,6 +9,7 @@
// _ = ignore
// G = GPR register
// C = 32-bit constant from array
// c = 8-bit constant from array
// I = immediate value from instruction
// F = FPR register, single
// V = FPR register, Vec4. Reg number always divisible by 4.
Expand All @@ -29,10 +31,13 @@ static const IRMeta irMeta[] = {
{ IROp::Or, "Or", "GGG" },
{ IROp::Xor, "Xor", "GGG" },
{ IROp::AddConst, "AddConst", "GGC" },
{ IROp::OptAddConst, "OptAddConst", "GC" },
{ IROp::SubConst, "SubConst", "GGC" },
{ IROp::AndConst, "AndConst", "GGC" },
{ IROp::OrConst, "OrConst", "GGC" },
{ IROp::XorConst, "XorConst", "GGC" },
{ IROp::OptAndConst, "OptAndConst", "GC" },
{ IROp::OptOrConst, "OptOrConst", "GC" },
{ IROp::Shl, "Shl", "GGG" },
{ IROp::Shr, "Shr", "GGG" },
{ IROp::Sar, "Sar", "GGG" },
Expand Down Expand Up @@ -115,6 +120,7 @@ static const IRMeta irMeta[] = {
{ IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" },
{ IROp::FMovFromGPR, "FMovFromGPR", "FG" },
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
{ IROp::OptFMovToGPRShr8, "OptFMovToGPRShr8", "GF" },
{ IROp::FpCondFromReg, "FpCondFromReg", "_G" },
{ IROp::FpCondToReg, "FpCondToReg", "G" },
{ IROp::FpCtrlFromReg, "FpCtrlFromReg", "_G" },
Expand All @@ -128,7 +134,7 @@ static const IRMeta irMeta[] = {
{ IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", "I" },
{ IROp::Vec4Init, "Vec4Init", "Vv" },
{ IROp::Vec4Shuffle, "Vec4Shuffle", "VVs" },
{ IROp::Vec4Blend, "Vec4Blend", "VVVC" },
{ IROp::Vec4Blend, "Vec4Blend", "VVVc" },
{ IROp::Vec4Mov, "Vec4Mov", "VV" },
{ IROp::Vec4Add, "Vec4Add", "VVV" },
{ IROp::Vec4Sub, "Vec4Sub", "VVV" },
Expand Down Expand Up @@ -218,6 +224,11 @@ int IRWriter::AddConstantFloat(float value) {
return AddConstant(val);
}

void IRWriter::ReplaceConstant(size_t instNumber, u32 newConstant) {
_dbg_assert_(instNumber < insts_.size());
insts_[instNumber].constant = newConstant;
}

static std::string GetGPRName(int r) {
if (r < 32) {
return currentDebugMIPS->GetRegName(0, r);
Expand Down Expand Up @@ -293,10 +304,13 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, u32 constant)
}
break;
case 'C':
snprintf(buf, bufSize, "%08x", constant);
snprintf(buf, bufSize, "0x%08x", constant);
break;
case 'c':
snprintf(buf, bufSize, "0x%02x", constant);
break;
case 'I':
snprintf(buf, bufSize, "%02x", param);
snprintf(buf, bufSize, "0x%02x", param);
break;
case 'm':
snprintf(buf, bufSize, "%d", param);
Expand Down
8 changes: 8 additions & 0 deletions Core/MIPS/IR/IRInst.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
// even be directly JIT-ed, but the gains will probably be tiny over our older direct
// MIPS->target JITs.

// Ops beginning with "OI" are specialized for IR Interpreter use. These will not be produced
// for the IR JITs.

enum class IROp : uint8_t {
SetConst,
SetConstF,
Expand All @@ -33,11 +36,14 @@ enum class IROp : uint8_t {
Xor,

AddConst,
OptAddConst,
SubConst,

AndConst,
OrConst,
XorConst,
OptAndConst,
OptOrConst,

Shl,
Shr,
Expand Down Expand Up @@ -133,6 +139,7 @@ enum class IROp : uint8_t {

FMovFromGPR,
FMovToGPR,
OptFMovToGPRShr8,

FSat0_1,
FSatMinus1_1,
Expand Down Expand Up @@ -391,6 +398,7 @@ class IRWriter {
void Clear() {
insts_.clear();
}
void ReplaceConstant(size_t instNumber, u32 newConstant);

const std::vector<IRInst> &GetInstructions() const { return insts_; }

Expand Down
21 changes: 20 additions & 1 deletion Core/MIPS/IR/IRInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,24 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
case IROp::AddConst:
mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
break;
case IROp::OptAddConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
mips->r[inst->dest] += inst->constant;
break;
case IROp::SubConst:
mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
break;
case IROp::AndConst:
mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
break;
case IROp::OptAndConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
mips->r[inst->dest] &= inst->constant;
break;
case IROp::OrConst:
mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
break;
case IROp::OptOrConst:
mips->r[inst->dest] |= inst->constant;
break;
case IROp::XorConst:
mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
break;
Expand Down Expand Up @@ -431,6 +440,8 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

case IROp::Vec2Pack31To16:
{
// Used in Tekken 6

u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
mips->fi[inst->dest] = val;
Expand All @@ -451,6 +462,8 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {

case IROp::Vec4Pack31To8:
{
// Used in Tekken 6

// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
// pshufb or SSE4 instructions can be used instead.
u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
Expand Down Expand Up @@ -987,7 +1000,13 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
case IROp::FMovToGPR:
memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
break;

case IROp::OptFMovToGPRShr8:
{
u32 temp;
memcpy(&temp, &mips->f[inst->src1], 4);
mips->r[inst->dest] = temp >> 8;
break;
}
case IROp::ExitToConst:
return inst->constant;

Expand Down
8 changes: 6 additions & 2 deletions Core/MIPS/IR/IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,15 +255,19 @@ void IRJit::RunLoopUntil(u64 globalticks) {
u32 opcode = inst & 0xFF000000;
if (opcode == MIPS_EMUHACK_OPCODE) {
u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
const IRInst *instPtr = blocks_.GetArenaPtr() + offset;
_dbg_assert_(instPtr->op == IROp::Downcount);
mips->downcount -= instPtr->constant;
instPtr++;
#ifdef IR_PROFILING
IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset));
TimeSpan span;
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
mips->pc = IRInterpret(mips, instPtr);
int64_t elapsedNanos = span.ElapsedNanos();
block->profileStats_.executions += 1;
block->profileStats_.totalNanos += elapsedNanos;
#else
mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
mips->pc = IRInterpret(mips, instPtr);
#endif
// Note: this will "jump to zero" on a badly constructed block missing exits.
if (!Memory::IsValid4AlignedAddress(mips->pc)) {
Expand Down
107 changes: 107 additions & 0 deletions Core/MIPS/IR/IRPassSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2149,3 +2149,110 @@ bool ReduceVec4Flush(const IRWriter &in, IRWriter &out, const IROptions &opts) {
}
return logBlocks;
}

// This optimizes away redundant loads-after-stores, which are surprisingly not that uncommon.
bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions &opts) {
CONDITIONAL_DISABLE;
// This tells us to skip an AND op that has been optimized out.
// Maybe we could skip multiple, but that'd slow things down and is pretty uncommon.
int nextSkip = -1;

bool logBlocks = false;
for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];

// Just copy the last instruction.
if (i == n - 1) {
out.Write(inst);
break;
}

out.Write(inst);

IRInst next = in.GetInstructions()[i + 1];
switch (inst.op) {
case IROp::Store32:
if (next.op == IROp::Load32 &&
next.constant == inst.constant &&
next.dest == inst.src3 &&
next.src1 == inst.src1) {
// The upcoming load is completely redundant.
// Skip it.
i++;
}
break;
default:
break;
}
}

return logBlocks;
}

bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions &opts) {
CONDITIONAL_DISABLE;
// This tells us to skip an AND op that has been optimized out.
// Maybe we could skip multiple, but that'd slow things down and is pretty uncommon.
int nextSkip = -1;

bool logBlocks = false;
// We also move the downcount to the top so the interpreter can assume that it's there.
bool foundDowncount = false;
out.Write(IROp::Downcount);

for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];

bool last = i == n - 1;

// Specialize some instructions.
switch (inst.op) {
case IROp::Downcount:
if (!foundDowncount) {
// Move the value into the initial Downcount.
foundDowncount = true;
out.ReplaceConstant(0, inst.constant);
} else {
// Already had a downcount. Let's just re-emit it.
out.Write(inst);
}
break;
case IROp::AddConst:
if (inst.src1 == inst.dest) {
inst.op = IROp::OptAddConst;
}
out.Write(inst);
break;
case IROp::AndConst:
if (inst.src1 == inst.dest) {
inst.op = IROp::OptAndConst;
}
out.Write(inst);
break;
case IROp::OrConst:
if (inst.src1 == inst.dest) {
inst.op = IROp::OptOrConst;
}
out.Write(inst);
break;
case IROp::FMovToGPR:
if (!last) {
IRInst next = in.GetInstructions()[i + 1];
if (next.op == IROp::ShrImm && next.src2 == 8 && next.src1 == next.dest && next.src1 == inst.dest) {
// Heavily used when writing display lists.
inst.op = IROp::OptFMovToGPRShr8;
i++; // Skip the next instruction.
}
out.Write(inst);
} else {
out.Write(inst);
}
break;
default:
out.Write(inst);
break;
}
}

return logBlocks;
}
3 changes: 3 additions & 0 deletions Core/MIPS/IR/IRPassSimplify.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ bool ReorderLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool MergeLoadStore(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool ReduceVec4Flush(const IRWriter &in, IRWriter &out, const IROptions &opts);

bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions &opts);
bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions &opts);
Loading