Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IR: Add mini native jit MIPS block profiler #18121

Merged
merged 1 commit into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions Core/MIPS/ARM64/Arm64IRAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) {
}

void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
BeginWrite(GetMemoryProtectPageSize());
// This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
if (DebugProfilerEnabled()) {
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
Write32(0);
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
Write32(0);
}

const u8 *disasmStart = AlignCodePage();
BeginWrite(GetMemoryProtectPageSize());

if (jo.useStaticAlloc) {
saveStaticRegisters_ = AlignCode16();
Expand All @@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
regs_.EmitLoadStaticRegisters();
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();

start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
Expand Down Expand Up @@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE);

LoadStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();

Expand Down Expand Up @@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
}

MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK);
#endif
Expand All @@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {

// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);

// Let's just dispatch again, we'll enter the block since we know it's there.
Expand All @@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);

WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);

Expand Down Expand Up @@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {

// Leave this at the end, add more stuff above.
if (enableDisasm) {
std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
std::vector<std::string> lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart));
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
Expand Down
4 changes: 4 additions & 0 deletions Core/MIPS/ARM64/Arm64IRCompFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {

auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);

// It might be in a non-volatile register.
// TODO: May have to handle a transfer if SIMD here.
if (regs_.IsFPRMapped(inst.src1)) {
Expand All @@ -527,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
if (regs_.F(inst.dest) != S0) {
fp_.FMOV(regs_.F(inst.dest), S0);
}

WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};

switch (inst.op) {
Expand Down
4 changes: 4 additions & 0 deletions Core/MIPS/ARM64/Arm64IRCompSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();

WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
MOVI2R(W0, inst.constant);
Expand All @@ -229,14 +230,17 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
}
#endif

WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;

case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0);
break;
Expand Down
34 changes: 34 additions & 0 deletions Core/MIPS/ARM64/Arm64IRJit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;

WriteDebugPC(startPC);

// Check the sign bit to check if negative.
FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);
MOVI2R(SCRATCH1, startPC);
Expand Down Expand Up @@ -129,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}

if (jo.enableBlocklink && jo.useBackJump) {
WriteDebugPC(startPC);

// Small blocks are common, check if it's < 32KB long.
ptrdiff_t distance = blockStart - GetCodePointer();
if (distance >= -0x8000 && distance < 0x8000) {
Expand Down Expand Up @@ -229,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) {

FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
MOVI2R(X0, value);
QuickCallFunction(SCRATCH2_64, &DoIRInst);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();

// We only need to check the return value if it's a potential exit.
Expand All @@ -256,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
MOVP2R(X0, MIPSGetName(op));
QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);
}
MOVI2R(X0, inst.constant);
QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}

Expand Down Expand Up @@ -354,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) {
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}

void Arm64JitBackend::WriteDebugPC(uint32_t pc) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
MOVI2R(SCRATCH1, pc);
STR(SCRATCH1, JITBASEREG, SCRATCH2);
}
}

void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
STR(r, JITBASEREG, SCRATCH2);
}
}

void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
MOVI2R(SCRATCH1, (int)status);
STR(SCRATCH1, JITBASEREG, SCRATCH2);
}
}

void Arm64JitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);
Expand Down
5 changes: 5 additions & 0 deletions Core/MIPS/ARM64/Arm64IRJit.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
void UpdateRoundingMode(bool force = false);
void MovFromPC(Arm64Gen::ARM64Reg r);
void MovToPC(Arm64Gen::ARM64Reg r);
// Destroys SCRATCH2.
void WriteDebugPC(uint32_t pc);
void WriteDebugPC(Arm64Gen::ARM64Reg r);
// Destroys SCRATCH2.
void WriteDebugProfilerStatus(IRProfilerStatus status);

void SaveStaticRegisters();
void LoadStaticRegisters();
Expand Down
88 changes: 85 additions & 3 deletions Core/MIPS/IR/IRNativeCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#include <atomic>
#include <climits>
#include <thread>
#include "Common/Profiler/Profiler.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
Expand All @@ -31,18 +33,57 @@ namespace MIPSComp {

// Compile time flag to enable debug stats for not compiled ops.
static constexpr bool enableDebugStats = false;
// Compile time flag for enabling the simple IR jit profiler.
static constexpr bool enableDebugProfiler = false;

// Used only for debugging when enableDebug is true above.
static std::map<uint8_t, int> debugSeenNotCompiledIR;
static std::map<const char *, int> debugSeenNotCompiled;
static std::map<std::pair<uint32_t, IRProfilerStatus>, int> debugSeenPCUsage;
static double lastDebugStatsLog = 0.0;
static constexpr double debugStatsFrequency = 5.0;

static std::thread debugProfilerThread;
std::atomic<bool> debugProfilerThreadStatus = false;

template <int N>
class IRProfilerTopValues {
public:
void Add(const std::pair<uint32_t, IRProfilerStatus> &v, int c) {
for (int i = 0; i < N; ++i) {
if (c > counts[i]) {
counts[i] = c;
values[i] = v;
return;
}
}
}

int counts[N]{};
std::pair<uint32_t, IRProfilerStatus> values[N]{};
};

const char *IRProfilerStatusToString(IRProfilerStatus s) {
switch (s) {
case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING";
case IRProfilerStatus::IN_JIT: return "IN_JIT";
case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE";
case IRProfilerStatus::COMPILING: return "COMPILING";
case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER";
case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT";
case IRProfilerStatus::SYSCALL: return "SYSCALL";
case IRProfilerStatus::INTERPRET: return "INTERPRET";
case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET";
}
return "INVALID";
}

static void LogDebugStats() {
if (!enableDebugStats)
if (!enableDebugStats && !enableDebugProfiler)
return;

double now = time_now_d();
if (now < lastDebugStatsLog + 1.0)
if (now < lastDebugStatsLog + debugStatsFrequency)
return;
lastDebugStatsLog = now;

Expand All @@ -66,16 +107,36 @@ static void LogDebugStats() {
}
debugSeenNotCompiled.clear();

IRProfilerTopValues<4> slowestPCs;
int64_t totalCount = 0;
for (auto it : debugSeenPCUsage) {
slowestPCs.Add(it.first, it.second);
totalCount += it.second;
}
debugSeenPCUsage.clear();

if (worstIROp != -1)
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
if (worstName != nullptr)
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
if (slowestPCs.counts[0] != 0) {
for (int i = 0; i < 4; ++i) {
uint32_t pc = slowestPCs.values[i].first;
const char *status = IRProfilerStatusToString(slowestPCs.values[i].second);
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : "";
WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount);
}
}
}

bool IRNativeBackend::DebugStatsEnabled() const {
return enableDebugStats;
}

bool IRNativeBackend::DebugProfilerEnabled() const {
return enableDebugProfiler;
}

void IRNativeBackend::NotifyMIPSInterpret(const char *name) {
_assert_(enableDebugStats);
debugSeenNotCompiled[name]++;
Expand Down Expand Up @@ -120,6 +181,13 @@ int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_

IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}

IRNativeBackend::~IRNativeBackend() {
if (debugProfilerThreadStatus) {
debugProfilerThreadStatus = false;
debugProfilerThread.join();
}
}

void IRNativeBackend::CompileIRInst(IRInst inst) {
switch (inst.op) {
case IROp::Nop:
Expand Down Expand Up @@ -421,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) {

// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
hooks_ = backend.GetNativeHooks();

if (enableDebugProfiler && hooks_.profilerPC) {
debugProfilerThreadStatus = true;
debugProfilerThread = std::thread([&] {
// Spin, spin spin... maybe could at least hook into sleeps.
while (debugProfilerThreadStatus) {
IRProfilerStatus stat = *hooks_.profilerStatus;
uint32_t pc = *hooks_.profilerPC;
if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) {
debugSeenPCUsage[std::make_pair(pc, stat)]++;
}
}
});
}
}

bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
Expand All @@ -432,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
}

void IRNativeJit::RunLoopUntil(u64 globalticks) {
if constexpr (enableDebugStats) {
if constexpr (enableDebugStats || enableDebugProfiler) {
LogDebugStats();
}

Expand Down
Loading