Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPU LLVM Cache Fix #14427

Merged
merged 7 commits into from
Aug 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions rpcs3/Emu/Cell/PPUModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1658,6 +1658,11 @@ std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_lo

void ppu_unload_prx(const lv2_prx& prx)
{
if (prx.segs[0].ptr != vm::base(prx.segs[0].addr))
{
return;
}

std::unique_lock lock(g_fxo->get<ppu_linkage_info>().mutex, std::defer_lock);

// Clean linkage info
Expand Down Expand Up @@ -1708,10 +1713,7 @@ void ppu_unload_prx(const lv2_prx& prx)
{
if (!seg.size) continue;

if (seg.ptr == vm::base(seg.addr))
{
vm::dealloc(seg.addr, vm::main);
}
vm::dealloc(seg.addr, vm::main);

const std::string hash_seg = fmt::format("%s-%u", hash, &seg - prx.segs.data());

Expand Down Expand Up @@ -2224,16 +2226,16 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str

void init_fxo_for_exec(utils::serial* ar, bool full);
init_fxo_for_exec(ar, false);

liblv2_begin = 0;
liblv2_end = 0;
}
else
{
g_ps3_process_info = old_process_info;
Emu.ConfigurePPUCache();
}

liblv2_begin = 0;
liblv2_end = 0;

if (!load_libs.empty())
{
for (const auto& name : load_libs)
Expand Down
20 changes: 11 additions & 9 deletions rpcs3/Emu/Cell/PPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3199,7 +3199,7 @@ extern fs::file make_file_view(fs::file&& _file, u64 offset)

extern void ppu_finalize(const ppu_module& info)
{
if (info.name.empty())
if (!info.cache.empty())
{
// Don't remove main module from memory
return;
Expand All @@ -3226,7 +3226,7 @@ extern void ppu_finalize(const ppu_module& info)
fmt::append(cache_path, "ppu-%s-%s/", fmt::base57(info.sha1), info.path.substr(info.path.find_last_of('/') + 1));

#ifdef LLVM_AVAILABLE
g_fxo->get<jit_module_manager>().remove(cache_path + info.name + "_" + std::to_string(info.segs[0].addr));
g_fxo->get<jit_module_manager>().remove(cache_path + info.name + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));
#endif
}

Expand Down Expand Up @@ -3452,7 +3452,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
{
obj.clear(), src.close(); // Clear decrypted file and elf object memory
ppu_initialize(*prx);
ppu_unload_prx(*prx);
ppu_finalize(*prx);
continue;
}
Expand Down Expand Up @@ -3572,6 +3571,11 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
break;
}

if (std::memcpy(main_module.sha1, _main.sha1, sizeof(_main.sha1)) == 0)
{
continue;
}

if (!_main.analyse(0, _main.elf_entry, _main.seg0_code_end, _main.applied_pathes, [](){ return Emu.IsStopped(); }))
{
break;
Expand Down Expand Up @@ -3641,7 +3645,7 @@ extern void ppu_initialize()
const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/");

// If empty we have no indication for firmware cache state, check everything
bool compile_fw = true;
bool compile_fw = !Emu.IsVsh();

idm::select<lv2_obj, lv2_prx>([&](u32, lv2_prx& _module)
{
Expand Down Expand Up @@ -3687,7 +3691,7 @@ extern void ppu_initialize()

const std::string mount_point = vfs::get("/dev_flash/");

bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point);
bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point) && g_cfg.core.ppu_llvm_precompilation;

if (compile_fw || dev_flash_located)
{
Expand All @@ -3699,8 +3703,6 @@ extern void ppu_initialize()
{
// Check if cache exists for this infinitesimally small prx
dev_flash_located = ppu_initialize(*prx, true);
idm::remove<lv2_obj, lv2_prx>(idm::last_id());
ppu_unload_prx(*prx);
}
}

Expand Down Expand Up @@ -3812,7 +3814,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
// Get cache path for this executable
std::string cache_path;

if (info.name.empty())
if (!info.cache.empty())
{
cache_path = info.cache;
}
Expand Down Expand Up @@ -3862,7 +3864,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
};

// Permanently loaded compiled PPU modules (name -> data)
jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + info.name + "_" + std::to_string(info.segs[0].addr));
jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + info.name + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));

// Compiler instance (deferred initialization)
std::shared_ptr<jit_compiler>& jit = jit_mod.pjit;
Expand Down
18 changes: 15 additions & 3 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3617,11 +3617,11 @@ void PPUTranslator::LWZ(ppu_opcode_t op)
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, true, false); // Either exact MMIO address or MMIO base with completing s16 address offset

if (m_may_be_mmio && !op.simm16)
if (m_may_be_mmio)
{
struct instructions_data
{
be_t<u32> insts[2];
be_t<u32> insts[3];
};

// Quick invalidation: expect exact MMIO address, so if the register is being reused with different offset than it's likely not MMIO
Expand All @@ -3637,6 +3637,12 @@ void PPUTranslator::LWZ(ppu_opcode_t op)
continue;
}

if (op.simm16 && spu_thread::test_is_problem_state_register_offset(test_op.uimm16, true, false))
{
// Found register reuse with different MMIO offset
continue;
}

switch (g_ppu_itype.decode(inst))
{
case ppu_itype::LWZ:
Expand Down Expand Up @@ -3714,7 +3720,7 @@ void PPUTranslator::STW(ppu_opcode_t op)
m_may_be_mmio &= (op.ra != 1u && op.ra != 13u); // Stack register and TLS address register are unlikely to be used in MMIO address calculation
m_may_be_mmio &= op.simm16 == 0 || spu_thread::test_is_problem_state_register_offset(op.uimm16, false, true); // Either exact MMIO address or MMIO base with completing s16 address offset

if (m_may_be_mmio && !op.simm16)
if (m_may_be_mmio)
{
struct instructions_data
{
Expand All @@ -3734,6 +3740,12 @@ void PPUTranslator::STW(ppu_opcode_t op)
continue;
}

if (op.simm16 && spu_thread::test_is_problem_state_register_offset(test_op.uimm16, false, true))
{
// Found register reuse with different MMIO offset
continue;
}

switch (g_ppu_itype.decode(inst))
{
case ppu_itype::LWZ:
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/lv2/sys_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ error_code sys_memory_allocate(cpu_thread& cpu, u32 size, u64 flags, vm::ptr<u32

if (alloc_addr)
{
sys_memory.notice("sys_mmapper_search_and_map(): Allocated 0x%x address (size=0x%x)", addr, size);
sys_memory.notice("sys_memory_allocate(): Allocated 0x%x address (size=0x%x)", addr, size);

vm::lock_sudo(addr, size);
cpu.check_state();
Expand Down
5 changes: 2 additions & 3 deletions rpcs3/Emu/Memory/vm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,11 @@ namespace vm
range_lock->store(to_store);
}

for (u64 i = 0, to_clear = umax;; i++)
for (u64 i = 0;; i++)
{
const u64 is_share = g_shmem[begin >> 16].load();
to_clear &= get_range_lock_bits(true);

const u64 busy = for_all_range_locks(to_clear, [&](u64 addr_exec, u32 size_exec)
const u64 busy = for_all_range_locks(get_range_lock_bits(true), [&](u64 addr_exec, u32 size_exec)
{
u64 addr = begin;

Expand Down
123 changes: 63 additions & 60 deletions rpcs3/Emu/System.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2941,11 +2941,72 @@ void Emulator::Kill(bool allow_autoexit, bool savestate)
ar.set_reading_state();
}

// Final termination from main thread (move the last ownership of join thread in order to destroy it)
CallFromMainThread([join_thread = std::move(join_thread), allow_autoexit, this]() mutable
// Log additional debug information - do not do it on the main thread due to the concern of halting UI events

if (g_tty && sys_log.notice)
{
// Write merged TTY output after emulation has been safely stopped

if (usz attempted_read_size = utils::sub_saturate<usz>(g_tty.pos(), m_tty_file_init_pos))
{
if (fs::file tty_read_fd{fs::get_cache_dir() + "TTY.log"})
{
// Enfore an arbitrary limit for now to avoid OOM in case the guest code has bombarded TTY
// 3MB, this should be enough
constexpr usz c_max_tty_spill_size = 0x30'0000;

std::string tty_buffer(std::min<usz>(attempted_read_size, c_max_tty_spill_size), '\0');
tty_buffer.resize(tty_read_fd.read_at(m_tty_file_init_pos, tty_buffer.data(), tty_buffer.size()));
tty_read_fd.close();

if (!tty_buffer.empty())
{
// Mark start and end very clearly with RPCS3 put in it
sys_log.notice("\nAccumulated RPCS3 TTY:\n\n\n%s\n\n\nEnd RPCS3 TTY Section.\n", tty_buffer);
}
}
}
}

if (g_cfg.core.spu_debug && sys_log.notice)
{
const std::string cache_path = rpcs3::cache::get_ppu_cache();

if (fs::file spu_log{cache_path + "/spu.log"})
{
// 96MB limit, this may be a lot but this only has an effect when enabling the debug option
constexpr usz c_max_spu_log_spill_size = 0x600'0000;
const usz total_size = spu_log.size();

std::string log_buffer(std::min<usz>(spu_log.size(), c_max_spu_log_spill_size), '\0');
log_buffer.resize(spu_log.read(log_buffer.data(), log_buffer.size()));
spu_log.close();

if (!log_buffer.empty())
{
usz to_remove = 0;
usz part_ctr = 1;

for (std::string_view not_logged = log_buffer; !not_logged.empty(); part_ctr++, not_logged.remove_prefix(to_remove))
{
std::string_view to_log = not_logged;
to_log = to_log.substr(0, 0x2'0000);
to_log = to_log.substr(0, utils::add_saturate<usz>(to_log.rfind("\n========== SPU BLOCK"sv), 1));
to_remove = to_log.size();

// Cannot log it all at once due to technical reasons, split it to 8MB at maximum of whole functions
// Assume the block prefix exists because it is created by RPCS3 (or log it in an ugly manner if it does not exist)
sys_log.notice("Logging spu.log part %u:\n\n%s\n", part_ctr, to_log);
}

sys_log.notice("End spu.log (%u bytes)", total_size);
}
}
}

// Final termination from main thread (move the last ownership of join thread in order to destroy it)
CallFromMainThread([join_thread = std::move(join_thread), allow_autoexit, this]() mutable
{
cpu_thread::cleanup();

initialize_timebased_time(0, true);
Expand Down Expand Up @@ -3006,64 +3067,6 @@ void Emulator::Kill(bool allow_autoexit, bool savestate)
m_state = system_state::stopped;
GetCallbacks().on_stop();

if (g_tty && sys_log.notice)
{
// Write merged TTY output after emulation has been safely stopped

if (usz attempted_read_size = utils::sub_saturate<usz>(g_tty.pos(), m_tty_file_init_pos))
{
if (fs::file tty_read_fd{fs::get_cache_dir() + "TTY.log"})
{
// Enfore an arbitrary limit for now to avoid OOM in case the guest code has bombarded TTY
// 16MB, this should be enough
constexpr usz c_max_tty_spill_size = 0x10'0000;

std::string tty_buffer(std::min<usz>(attempted_read_size, c_max_tty_spill_size), '\0');
tty_buffer.resize(tty_read_fd.read_at(m_tty_file_init_pos, tty_buffer.data(), tty_buffer.size()));
tty_read_fd.close();

if (!tty_buffer.empty())
{
// Mark start and end very clearly with RPCS3 put in it
sys_log.notice("\nAccumulated RPCS3 TTY:\n\n\n%s\n\n\nEnd RPCS3 TTY Section.\n", tty_buffer);
}
}
}
}

if (g_cfg.core.spu_debug && sys_log.notice)
{
if (fs::file spu_log{cache_path + "/spu.log"})
{
// 96MB limit, this may be a lot but this only has an effect when enabling the debug option
constexpr usz c_max_tty_spill_size = 0x60'0000;

std::string log_buffer(std::min<usz>(spu_log.size(), c_max_tty_spill_size), '\0');
log_buffer.resize(spu_log.read(log_buffer.data(), log_buffer.size()));
spu_log.close();

if (!log_buffer.empty())
{
usz to_remove = 0;
usz part_ctr = 1;

for (std::string_view not_logged = log_buffer; !not_logged.empty(); part_ctr++, not_logged.remove_prefix(to_remove))
{
std::string_view to_log = not_logged;
to_log = to_log.substr(0, 0x8'0000);
to_log = to_log.substr(0, utils::add_saturate<usz>(to_log.rfind("\n========== SPU BLOCK"sv), 1));
to_remove = to_log.size();

// Cannot log it all at once due to technical reasons, split it to 8MB at maximum of whole functions
// Assume the block prefix exists because it is created by RPCS3 (or log it in an ugly manner if it does not exist)
sys_log.notice("Logging spu.log part %u:\n\n%s\n", part_ctr, to_log);
}

sys_log.notice("End spu.log");
}
}
}

// Always Enable display sleep, not only if it was prevented.
enable_display_sleep();

Expand Down