diff --git a/libswirl/build.h b/libswirl/build.h index 6c06da4abd..46c5642a2b 100644 --- a/libswirl/build.h +++ b/libswirl/build.h @@ -347,7 +347,7 @@ //DC : 16 mb ram, 8 mb vram, 2 mb aram, 2 mb bios, 128k flash #define RAM_SIZE (16*1024*1024) #define VRAM_SIZE (8*1024*1024) - #define ARAM_SIZE (2*1024*1024) + #define INTERNAL_ARAM_SIZE (2*1024*1024) #define BIOS_SIZE (2*1024*1024) #define FLASH_SIZE (128*1024) @@ -362,7 +362,7 @@ //Devkit : 32 mb ram, 8? mb vram, 2? mb aram, 2? mb bios, ? flash #define RAM_SIZE (32*1024*1024) #define VRAM_SIZE (8*1024*1024) - #define ARAM_SIZE (2*1024*1024) + #define INTERNAL_ARAM_SIZE (2*1024*1024) #define BIOS_SIZE (2*1024*1024) #define FLASH_SIZE (128*1024) @@ -375,7 +375,7 @@ //Naomi : 32 mb ram, 16 mb vram, 8 mb aram, 2 mb bios, ? flash #define RAM_SIZE (32*1024*1024) #define VRAM_SIZE (16*1024*1024) - #define ARAM_SIZE (8*1024*1024) + #define INTERNAL_ARAM_SIZE (8*1024*1024) #define BIOS_SIZE (2*1024*1024) #define BBSRAM_SIZE (32*1024) @@ -388,7 +388,7 @@ //Naomi2 : 32 mb ram, 16 mb vram, 8 mb aram, 2 mb bios, ? flash #define RAM_SIZE (32*1024*1024) #define VRAM_SIZE (16*1024*1024) - #define ARAM_SIZE (8*1024*1024) + #define INTERNAL_ARAM_SIZE (8*1024*1024) #define BIOS_SIZE (2*1024*1024) #define BBSRAM_SIZE (32*1024) @@ -403,7 +403,7 @@ //Atomiswave : 16 mb ram, 8 mb vram, 8 mb aram, 128kb bios on flash, 128kb battery-backed ram #define RAM_SIZE (16*1024*1024) #define VRAM_SIZE (8*1024*1024) - #define ARAM_SIZE (8*1024*1024) + #define INTERNAL_ARAM_SIZE (8*1024*1024) #define BIOS_SIZE (128*1024) #define BBSRAM_SIZE (128*1024) @@ -417,7 +417,6 @@ #define RAM_MASK (RAM_SIZE-1) #define VRAM_MASK (VRAM_SIZE-1) -#define ARAM_MASK (ARAM_SIZE-1) #define BIOS_MASK (BIOS_SIZE-1) #ifdef FLASH_SIZE diff --git a/libswirl/core.mk b/libswirl/core.mk index 140df971f7..9ea2fab1b8 100644 --- a/libswirl/core.mk +++ b/libswirl/core.mk @@ -41,7 +41,7 @@ ifdef X64_REC endif ifdef ARM32_REC - RZDCY_MODULES += jit/backend/arm32/ jit/emitter/arm/ + RZDCY_MODULES += jit/backend/arm32/ jit/emitter/arm/ deps/vixl/ deps/vixl/aarch32/ endif ifdef ARM64_REC diff --git a/libswirl/deps/vixl/aarch32/assembler-aarch32.h b/libswirl/deps/vixl/aarch32/assembler-aarch32.h index bb7df84046..833f0aef3c 100644 --- a/libswirl/deps/vixl/aarch32/assembler-aarch32.h +++ b/libswirl/deps/vixl/aarch32/assembler-aarch32.h @@ -43,7 +43,7 @@ class Assembler : public internal::AssemblerBase { bool allow_unpredictable_; bool allow_strongly_discouraged_; - protected: + public: void EmitT32_16(uint16_t instr); void EmitT32_32(uint32_t instr); void EmitA32(uint32_t instr); diff --git a/libswirl/deps/vixl/aarch32/macro-assembler-aarch32.h b/libswirl/deps/vixl/aarch32/macro-assembler-aarch32.h index 115d4d8439..5d8e84b7e1 100644 --- a/libswirl/deps/vixl/aarch32/macro-assembler-aarch32.h +++ b/libswirl/deps/vixl/aarch32/macro-assembler-aarch32.h @@ -411,6 +411,19 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { pool_manager_.Bind(this, label, GetCursorOffset()); } + // ADDED for REICAST -- allows to bind external functions + void BindToOffset(Label* label, ptrdiff_t offset) { + // Assert that we have the correct buffer alignment. + if (IsUsingT32()) { + VIXL_ASSERT(GetBuffer()->Is16bitAligned()); + } + else { + VIXL_ASSERT(GetBuffer()->Is32bitAligned()); + } + + pool_manager_.Bind(this, label, offset); + } + void RegisterLiteralReference(RawLiteral* literal) { if (literal->IsManuallyPlaced()) return; RegisterForwardReference(literal); diff --git a/libswirl/deps/vixl/code-buffer-vixl.cc b/libswirl/deps/vixl/code-buffer-vixl.cc index 0fdd373fe9..eb75e1359c 100644 --- a/libswirl/deps/vixl/code-buffer-vixl.cc +++ b/libswirl/deps/vixl/code-buffer-vixl.cc @@ -30,6 +30,7 @@ extern "C" { #include "code-buffer-vixl.h" #include "utils-vixl.h" +#include namespace vixl { @@ -103,12 +104,13 @@ void CodeBuffer::SetWritable() { } #endif - +// MODIFIED FOR REICAST // ANDROID armhf build void CodeBuffer::EmitString(const char* string) { VIXL_ASSERT(HasSpaceFor(strlen(string) + 1)); char* dst = reinterpret_cast(cursor_); dirty_ = true; - char* null_char = stpcpy(dst, string); + //char* null_char = stpcpy(dst, string); + char* null_char = strcpy(dst, string) + strlen(string); cursor_ = reinterpret_cast(null_char) + 1; } diff --git a/libswirl/deps/vixl/globals-vixl.h b/libswirl/deps/vixl/globals-vixl.h index a92bd91fbe..9a2687091e 100644 --- a/libswirl/deps/vixl/globals-vixl.h +++ b/libswirl/deps/vixl/globals-vixl.h @@ -271,6 +271,8 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {} #define VIXL_INCLUDE_TARGET_AARCH64 #endif +#define VIXL_INCLUDE_TARGET_A32 + #if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32) #define VIXL_INCLUDE_TARGET_AARCH32 #elif defined(VIXL_INCLUDE_TARGET_A32) diff --git a/libswirl/deps/vixl/pool-manager-impl.h b/libswirl/deps/vixl/pool-manager-impl.h index c49b643fc3..a29bed6e27 100644 --- a/libswirl/deps/vixl/pool-manager-impl.h +++ b/libswirl/deps/vixl/pool-manager-impl.h @@ -452,15 +452,18 @@ T PoolManager::Bind(MacroAssemblerInterface* masm, min_location = existing_object->min_location_; } - // Align if needed, and add necessary padding to reach the min_location_. - T aligned_location = AlignUp(location, alignment); - masm->EmitNopBytes(aligned_location - location); - location = aligned_location; - while (location < min_location) { - masm->EmitNopBytes(alignment); - location += alignment; + // hack-fix for negative offsets when aligned + if (alignment != 1) { + // Align if needed, and add necessary padding to reach the min_location_. + T aligned_location = AlignUp(location, alignment); + masm->EmitNopBytes(aligned_location - location); + location = aligned_location; + while (location < min_location) { + masm->EmitNopBytes(alignment); + location += alignment; + } } - + object->SetLocation(masm->AsAssemblerBase(), location); object->MarkBound(); diff --git a/libswirl/gui/gui.cpp b/libswirl/gui/gui.cpp index d2ddff176a..597d2d0fd4 100644 --- a/libswirl/gui/gui.cpp +++ b/libswirl/gui/gui.cpp @@ -557,7 +557,7 @@ struct ReicastUI_impl : GUI { // Exit to main menu gui_state = Main; game_started = false; - virtualDreamcast.release(); + virtualDreamcast.reset(); cfgSetVirtual("config", "image", ""); } @@ -717,7 +717,7 @@ struct ReicastUI_impl : GUI { { gui_state = Main; game_started = false; - virtualDreamcast.release(); + virtualDreamcast.reset(); cfgSetVirtual("config", "image", ""); switch (rc) { diff --git a/libswirl/gui/gui_settings_advanced.cpp b/libswirl/gui/gui_settings_advanced.cpp index 269fc186af..62a411782a 100644 --- a/libswirl/gui/gui_settings_advanced.cpp +++ b/libswirl/gui/gui_settings_advanced.cpp @@ -10,19 +10,19 @@ void gui_settings_advanced() if (ImGui::BeginTabItem("Advanced")) { ImGui::PushStyleVar(ImGuiStyleVar_FramePadding, normal_padding); - if (ImGui::CollapsingHeader("CPU Mode", ImGuiTreeNodeFlags_DefaultOpen)) + if (ImGui::CollapsingHeader("MCPU Mode", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Columns(2, "cpu_modes", false); - ImGui::RadioButton("Dynarec", &dynarec_enabled, 1); + ImGui::Columns(2, "sh4_modes", false); + ImGui::RadioButton("MCPU Dynarec", &dynarec_enabled, 1); ImGui::SameLine(); gui_ShowHelpMarker("Use the dynamic recompiler. Recommended in most cases"); ImGui::NextColumn(); - ImGui::RadioButton("Interpreter", &dynarec_enabled, 0); + ImGui::RadioButton("MCPU Interpreter", &dynarec_enabled, 0); ImGui::SameLine(); gui_ShowHelpMarker("Use the interpreter. Very slow but may help in case of a dynarec problem"); ImGui::Columns(1, NULL, false); } - if (ImGui::CollapsingHeader("Dynarec Options", dynarec_enabled ? ImGuiTreeNodeFlags_DefaultOpen : ImGuiTreeNodeFlags_None)) + if (ImGui::CollapsingHeader("SH4 Dynarec Options", dynarec_enabled ? ImGuiTreeNodeFlags_DefaultOpen : ImGuiTreeNodeFlags_None)) { ImGui::Checkbox("Safe Mode", &settings.dynarec.safemode); ImGui::SameLine(); @@ -57,7 +57,31 @@ void gui_settings_advanced() ImGui::SameLine(); gui_ShowHelpMarker("How to detect self-modifying code. Full check recommended"); } - if (ImGui::CollapsingHeader("Other", ImGuiTreeNodeFlags_DefaultOpen)) + if (ImGui::CollapsingHeader("SCPU Mode", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Columns(2, "arm7_modes", false); + ImGui::RadioButton("SCPU Dynarec", &settings.dynarec.ScpuEnable, 1); + ImGui::SameLine(); + gui_ShowHelpMarker("Use the ARM7 dynamic recompiler. Recommended in most cases"); + ImGui::NextColumn(); + ImGui::RadioButton("SCPU Interpreter", &settings.dynarec.ScpuEnable, 0); + ImGui::SameLine(); + gui_ShowHelpMarker("Use the ARM7 interpreter. Very slow but may help in case of a dynarec problem"); + ImGui::Columns(1, NULL, false); + } + if (ImGui::CollapsingHeader("DSP Mode", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Columns(2, "dsp_modes", false); + ImGui::RadioButton("DSP Dynarec", &settings.dynarec.DspEnable, 1); + ImGui::SameLine(); + gui_ShowHelpMarker("Use the DSP dynamic recompiler. Recommended in most cases"); + ImGui::NextColumn(); + ImGui::RadioButton("DSP Interpreter", &settings.dynarec.DspEnable, 0); + ImGui::SameLine(); + gui_ShowHelpMarker("Use the DSP interpreter. Very slow but may help in case of a DSP dynarec problem"); + ImGui::Columns(1, NULL, false); + } + if (ImGui::CollapsingHeader("Other", ImGuiTreeNodeFlags_DefaultOpen)) { #ifndef _ANDROID ImGui::Checkbox("Serial Console", &settings.debug.SerialConsole); diff --git a/libswirl/hw/aica/aica.cpp b/libswirl/hw/aica/aica.cpp index 2feed6fa04..8b13789179 100644 --- a/libswirl/hw/aica/aica.cpp +++ b/libswirl/hw/aica/aica.cpp @@ -1,235 +1 @@ -#include "aica.h" -#include "sgc_if.h" -#include "aica_mmio.h" -#include "aica_mem.h" -#include -#include "hw/holly/holly_intc.h" -#include "hw/holly/sb.h" -#define SH4_IRQ_BIT (1<<(holly_SPU_IRQ&255)) - -CommonData_struct* CommonData; -DSPData_struct* DSPData; -InterruptInfo* MCIEB; -InterruptInfo* MCIPD; -InterruptInfo* MCIRE; -InterruptInfo* SCIEB; -InterruptInfo* SCIPD; -InterruptInfo* SCIRE; - -//Interrupts -//arm side -u32 GetL(u32 witch) -{ - if (witch>7) - witch=7; //higher bits share bit 7 - - u32 bit=1<SCILV0 & bit) - rv=1; - - if (CommonData->SCILV1 & bit) - rv|=2; - - if (CommonData->SCILV2 & bit) - rv|=4; - - return rv; -} -void update_arm_interrupts() -{ - u32 p_ints=SCIEB->full & SCIPD->full; - - u32 Lval=0; - if (p_ints) - { - u32 bit_value=1;//first bit - //scan all interrupts , lo to hi bit.I assume low bit ints have higher priority over others - for (u32 i=0;i<11;i++) - { - if (p_ints & bit_value) - { - //for the first one , Set the L reg & exit - Lval=GetL(i); - break; - } - bit_value<<=1; //next bit - } - } - - libARM_InterruptChange(p_ints,Lval); -} - -//sh4 side -void UpdateSh4Ints() -{ - u32 p_ints = MCIEB->full & MCIPD->full; - if (p_ints) - { - if ((SB_ISTEXT & SH4_IRQ_BIT )==0) - { - //if no interrupt is already pending then raise one :) - asic_RaiseInterrupt(holly_SPU_IRQ); - } - } - else - { - if (SB_ISTEXT&SH4_IRQ_BIT) - { - asic_CancelInterrupt(holly_SPU_IRQ); - } - } - -} - - -AicaTimer timers[3]; - -void libAICA_TimeStep() -{ - for (int i=0;i<3;i++) - timers[i].StepTimer(1); - - SCIPD->SAMPLE_DONE=1; - - if (settings.aica.NoBatch) - AICA_Sample(); - - //Make sure sh4/arm interrupt system is up to date :) - update_arm_interrupts(); - UpdateSh4Ints(); -} - -//Memory i/o -template -void WriteAicaReg(u32 reg,u32 data) -{ - switch (reg) - { - case SCIPD_addr: - verify(sz!=1); - if (data & (1<<5)) - { - SCIPD->SCPU=1; - update_arm_interrupts(); - } - //Read only - return; - - case SCIRE_addr: - { - verify(sz!=1); - SCIPD->full&=~(data /*& SCIEB->full*/ ); //is the & SCIEB->full needed ? doesn't seem like it - data=0;//Write only - update_arm_interrupts(); - } - break; - - case MCIPD_addr: - if (data & (1<<5)) - { - verify(sz!=1); - MCIPD->SCPU=1; - UpdateSh4Ints(); - } - //Read only - return; - - case MCIRE_addr: - { - verify(sz!=1); - MCIPD->full&=~data; - UpdateSh4Ints(); - //Write only - } - break; - - case TIMER_A: - WriteMemArr(aica_reg,reg,data,sz); - timers[0].RegisterWrite(); - break; - - case TIMER_B: - WriteMemArr(aica_reg,reg,data,sz); - timers[1].RegisterWrite(); - break; - - case TIMER_C: - WriteMemArr(aica_reg,reg,data,sz); - timers[2].RegisterWrite(); - break; - - default: - WriteMemArr(aica_reg,reg,data,sz); - break; - } -} - - - -template void WriteAicaReg<1>(u32 reg,u32 data); -template void WriteAicaReg<2>(u32 reg,u32 data); - -struct AICA_impl : AICA { - - s32 Init() - { - aica_init_mem(); - aica_mmio_Init(); - - verify(sizeof(*CommonData) == 0x508); - verify(sizeof(*DSPData) == 0x15C8); - - CommonData = (CommonData_struct*)&aica_reg[0x2800]; - DSPData = (DSPData_struct*)&aica_reg[0x3000]; - //slave cpu (arm7) - - SCIEB = (InterruptInfo*)&aica_reg[0x289C]; - SCIPD = (InterruptInfo*)&aica_reg[0x289C + 4]; - SCIRE = (InterruptInfo*)&aica_reg[0x289C + 8]; - //Main cpu (sh4) - MCIEB = (InterruptInfo*)&aica_reg[0x28B4]; - MCIPD = (InterruptInfo*)&aica_reg[0x28B4 + 4]; - MCIRE = (InterruptInfo*)&aica_reg[0x28B4 + 8]; - - sgc_Init(); - for (int i = 0; i < 3; i++) - timers[i].Init(aica_reg, i); - - return rv_ok; - } - - void Reset(bool manual) - { - if (!manual) - aica_init_mem(); - sgc_Init(); - aica_mmio_Reset(manual); - } - - void Term() - { - sgc_Term(); - } - - //Mainloop - void Update(u32 Samples) - { - AICA_Sample32(); - } - - //Aica reads (both sh4&arm) - u32 ReadReg(u32 addr, u32 size) { - return libAICA_ReadReg(addr, size); - } - - void WriteReg(u32 addr, u32 data, u32 size) { - libAICA_WriteReg(addr, data, size); - } -}; - -AICA* AICA::Create() { - return new AICA_impl(); -} diff --git a/libswirl/hw/aica/aica_mem.cpp b/libswirl/hw/aica/aica_mem.cpp deleted file mode 100644 index e6d5a8bffe..0000000000 --- a/libswirl/hw/aica/aica_mem.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "aica_mem.h" - -//Map using _vmem .. yay -void aica_init_mem() -{ - memset(aica_reg,0,sizeof(aica_reg)); - aica_ram.data[ARAM_SIZE-1]=1; - aica_ram.Zero(); -} -//kill mem map & free used mem ;) -void aica_term_mem() -{ - -} - diff --git a/libswirl/hw/aica/aica_mem.h b/libswirl/hw/aica/aica_mem.h index 789b67e1e3..fcfe00d835 100644 --- a/libswirl/hw/aica/aica_mem.h +++ b/libswirl/hw/aica/aica_mem.h @@ -1,12 +1,2 @@ #pragma once #include "aica.h" - -extern VLockedMemory aica_ram; - -extern void aica_init_mem(); -extern void aica_term_mem(); - -extern u8 aica_reg[0x8000]; - -#define AICA_RAM_SIZE (ARAM_SIZE) -#define AICA_RAM_MASK (ARAM_MASK) \ No newline at end of file diff --git a/libswirl/hw/aica/aica_mmio.cpp b/libswirl/hw/aica/aica_mmio.cpp index fcb545c52f..f2af06345b 100644 --- a/libswirl/hw/aica/aica_mmio.cpp +++ b/libswirl/hw/aica/aica_mmio.cpp @@ -3,6 +3,95 @@ Handles RTC, Display mode reg && arm reset reg ! arm7 is handled on a separate arm plugin now */ +#include "aica.h" +#include "sgc_if.h" +#include "aica_mmio.h" +#include "aica_mem.h" +#include +#include "hw/holly/holly_intc.h" +#include "hw/holly/sb.h" +#include "hw/arm7/SoundCPU.h" +#include "hw/arm7/arm7.h" + +#define SH4_IRQ_BIT (1<<(holly_SPU_IRQ&255)) + +CommonData_struct* CommonData; +DSPData_struct* DSPData; +InterruptInfo* MCIEB; +InterruptInfo* MCIPD; +InterruptInfo* MCIRE; +InterruptInfo* SCIEB; +InterruptInfo* SCIPD; +InterruptInfo* SCIRE; + +//Interrupts +//arm side +u32 GetL(u32 witch) +{ + if (witch > 7) + witch = 7; //higher bits share bit 7 + + u32 bit = 1 << witch; + u32 rv = 0; + + if (CommonData->SCILV0 & bit) + rv = 1; + + if (CommonData->SCILV1 & bit) + rv |= 2; + + if (CommonData->SCILV2 & bit) + rv |= 4; + + return rv; +} +void update_arm_interrupts() +{ + u32 p_ints = SCIEB->full & SCIPD->full; + + u32 Lval = 0; + if (p_ints) + { + u32 bit_value = 1;//first bit + //scan all interrupts , lo to hi bit.I assume low bit ints have higher priority over others + for (u32 i = 0; i < 11; i++) + { + if (p_ints & bit_value) + { + //for the first one , Set the L reg & exit + Lval = GetL(i); + break; + } + bit_value <<= 1; //next bit + } + } + + libARM_InterruptChange(p_ints, Lval); +} + +//sh4 side +void UpdateSh4Ints() +{ + u32 p_ints = MCIEB->full & MCIPD->full; + if (p_ints) + { + if ((SB_ISTEXT & SH4_IRQ_BIT) == 0) + { + //if no interrupt is already pending then raise one :) + asic_RaiseInterrupt(holly_SPU_IRQ); + } + } + else + { + if (SB_ISTEXT & SH4_IRQ_BIT) + { + asic_CancelInterrupt(holly_SPU_IRQ); + } + } + +} + + #include "hw/sh4/sh4_mmio.h" #include "types.h" @@ -18,216 +107,306 @@ #include "libswirl.h" #include -VLockedMemory aica_ram; -u32 VREG;//video reg =P -u32 ARMRST;//arm reset reg -u32 rtc_EN=0; -int dma_sched_id; -u32 RealTimeClock; +struct AICARTC_impl : MMIODevice +{ + int rtc_schid = -1; + u32 rtc_EN = 0; + u32 RealTimeClock; + u32 GetRTC_now() + { + // The Dreamcast Epoch time is 1/1/50 00:00 but without support for time zone or DST. + // We compute the TZ/DST current time offset and add it to the result + // as if we were in the UTC time zone (as well as the DC Epoch) + time_t rawtime = time(NULL); + struct tm localtm, gmtm; + localtm = *localtime(&rawtime); + gmtm = *gmtime(&rawtime); + gmtm.tm_isdst = -1; + time_t time_offset = mktime(&localtm) - mktime(&gmtm); + // 1/1/50 to 1/1/70 is 20 years and 5 leap days + return (20 * 365 + 5) * 24 * 60 * 60 + rawtime + time_offset; + } -u8 aica_reg[0x8000]; + bool Init() { + rtc_schid = sh4_sched_register(this, 0, STATIC_FORWARD(AICARTC_impl, Update)); + sh4_sched_request(rtc_schid, SH4_MAIN_CLOCK); -//00000000~007FFFFF @DRAM_AREA* -//00800000~008027FF @CHANNEL_DATA -//00802800~00802FFF @COMMON_DATA -//00803000~00807FFF @DSP_DATA -template -u32 ReadReg(u32 addr) -{ - if (addr < 0x2800) - { - ReadMemArrRet(aica_reg, addr, sz); + return true; } - if (addr < 0x2818) - { - if (sz == 1) - { - ReadCommonReg(addr, true); - ReadMemArrRet(aica_reg, addr, 1); - } - else - { - ReadCommonReg(addr, false); - //ReadCommonReg8(addr+1); - ReadMemArrRet(aica_reg, addr, 2); - } + + void Reset(bool m) { + RealTimeClock = GetRTC_now(); } - ReadMemArrRet(aica_reg, addr, sz); -} -template -void WriteReg(u32 addr, u32 data) -{ - if (addr < 0x2000) + u32 Read(u32 addr, u32 sz) { - //Channel data - u32 chan = addr >> 7; - u32 reg = addr & 0x7F; - if (sz == 1) + switch (addr & 0xFF) { - WriteMemArr(aica_reg, addr, data, 1); - WriteChannelReg8(chan, reg); + case 0: + return RealTimeClock >> 16; + case 4: + return RealTimeClock & 0xFFFF; + case 8: + return 0; } - else - { - WriteMemArr(aica_reg, addr, data, 2); - WriteChannelReg8(chan, reg); - WriteChannelReg8(chan, reg + 1); - } - return; + + printf("ReadMem_aica_rtc : invalid address\n"); + return 0; } - if (addr < 0x2800) + void Write(u32 addr, u32 data, u32 sz) { - if (sz == 1) - { - WriteMemArr(aica_reg, addr, data, 1); - } - else + switch (addr & 0xFF) { - WriteMemArr(aica_reg, addr, data, 2); + case 0: + if (rtc_EN) + { + RealTimeClock &= 0xFFFF; + RealTimeClock |= (data & 0xFFFF) << 16; + rtc_EN = 0; + } + return; + case 4: + if (rtc_EN) + { + RealTimeClock &= 0xFFFF0000; + RealTimeClock |= data & 0xFFFF; + //TODO: Clean the internal timer ? + } + return; + case 8: + rtc_EN = data & 1; + return; } + return; } - if (addr < 0x2818) + int Update(int tag, int c, int j) { - if (sz == 1) - { - WriteCommonReg8(addr, data); - } - else - { - WriteCommonReg8(addr, data & 0xFF); - WriteCommonReg8(addr + 1, data >> 8); - } - return; + RealTimeClock++; + + return SH4_MAIN_CLOCK; } - if (addr >= 0x3000) - { - if (sz == 1) - { - WriteMemArr(aica_reg, addr, data, 1); - dsp_writenmem(addr); - } - else - { - WriteMemArr(aica_reg, addr, data, 2); - dsp_writenmem(addr); - dsp_writenmem(addr + 1); - } + + void serialize(void** data, unsigned int* total_size) { + REICAST_S(rtc_EN); } - if (sz == 1) - WriteAicaReg<1>(addr, data); - else - WriteAicaReg<2>(addr, data); -} -//Aica reads (both sh4&arm) -u32 libAICA_ReadReg(u32 addr, u32 size) -{ - if (size == 1) - return ReadReg<1>(addr & 0x7FFF); - else - return ReadReg<2>(addr & 0x7FFF); - //must never come here - return 0; + void unserialize(void** data, unsigned int* total_size) { + REICAST_US(rtc_EN); + } +}; + +MMIODevice* Create_RTCDevice() { + return new AICARTC_impl(); } -void libAICA_WriteReg(u32 addr, u32 data, u32 size) -{ - if (size == 1) - WriteReg<1>(addr & 0x7FFF, data); - else - WriteReg<2>(addr & 0x7FFF, data); +u32 libAICA_GetRTC_now() { + return sh4_cpu->GetA0H(A0H_RTC)->GetRTC_now(); } +struct AicaDevice final : AICA { + u32 VREG;//video reg =P + u32 ARMRST;//arm reset reg -u32 GetRTC_now() -{ - // The Dreamcast Epoch time is 1/1/50 00:00 but without support for time zone or DST. - // We compute the TZ/DST current time offset and add it to the result - // as if we were in the UTC time zone (as well as the DC Epoch) - time_t rawtime = time(NULL); - struct tm localtm, gmtm; - localtm = *localtime(&rawtime); - gmtm = *gmtime(&rawtime); - gmtm.tm_isdst = -1; - time_t time_offset = mktime(&localtm) - mktime(&gmtm); - // 1/1/50 to 1/1/70 is 20 years and 5 leap days - return (20 * 365 + 5) * 24 * 60 * 60 + rawtime + time_offset; -} + int dma_sched_id; -u32 ReadMem_aica_rtc(u32 addr,u32 sz) -{ - switch( addr & 0xFF ) + AicaTimer timers[3]; + + u8 aica_reg[0x8000]; + + + //Memory i/o + template + void WriteAicaReg(u32 reg, u32 data) { - case 0: - return RealTimeClock>>16; - case 4: - return RealTimeClock &0xFFFF; - case 8: - return 0; + switch (reg) + { + case SCIPD_addr: + verify(sz != 1); + if (data & (1 << 5)) + { + SCIPD->SCPU = 1; + update_arm_interrupts(); + } + //Read only + return; + + case SCIRE_addr: + { + verify(sz != 1); + SCIPD->full &= ~(data /*& SCIEB->full*/); //is the & SCIEB->full needed ? doesn't seem like it + data = 0;//Write only + update_arm_interrupts(); + } + break; + + case MCIPD_addr: + if (data & (1 << 5)) + { + verify(sz != 1); + MCIPD->SCPU = 1; + UpdateSh4Ints(); + } + //Read only + return; + + case MCIRE_addr: + { + verify(sz != 1); + MCIPD->full &= ~data; + UpdateSh4Ints(); + //Write only + } + break; + + case TIMER_A: + WriteMemArr(aica_reg, reg, data, sz); + timers[0].RegisterWrite(); + break; + + case TIMER_B: + WriteMemArr(aica_reg, reg, data, sz); + timers[1].RegisterWrite(); + break; + + case TIMER_C: + WriteMemArr(aica_reg, reg, data, sz); + timers[2].RegisterWrite(); + break; + + default: + WriteMemArr(aica_reg, reg, data, sz); + break; + } } - printf("ReadMem_aica_rtc : invalid address\n"); - return 0; -} -void WriteMem_aica_rtc(u32 addr,u32 data,u32 sz) -{ - switch( addr & 0xFF ) + //00000000~007FFFFF @DRAM_AREA* + //00800000~008027FF @CHANNEL_DATA + //00802800~00802FFF @COMMON_DATA + //00803000~00807FFF @DSP_DATA + template + u32 ReadReg_internal(u32 addr) { - case 0: - if (rtc_EN) + if (addr < 0x2800) { - RealTimeClock&=0xFFFF; - RealTimeClock|=(data&0xFFFF)<<16; - rtc_EN=0; + ReadMemArrRet(aica_reg, addr, sz); } - return; - case 4: - if (rtc_EN) + if (addr < 0x2818) { - RealTimeClock&=0xFFFF0000; - RealTimeClock|= data&0xFFFF; - //TODO: Clean the internal timer ? + if (sz == 1) + { + ReadCommonReg(addr, true); + ReadMemArrRet(aica_reg, addr, 1); + } + else + { + ReadCommonReg(addr, false); + //ReadCommonReg8(addr+1); + ReadMemArrRet(aica_reg, addr, 2); + } } - return; - case 8: - rtc_EN=data&1; - return; - } - return; -} + ReadMemArrRet(aica_reg, addr, sz); + } + template + void WriteReg_internal(u32 addr, u32 data) + { + if (addr < 0x2000) + { + //Channel data + u32 chan = addr >> 7; + u32 reg = addr & 0x7F; + if (sz == 1) + { + WriteMemArr(aica_reg, addr, data, 1); + WriteChannelReg8(chan, reg); + } + else + { + WriteMemArr(aica_reg, addr, data, 2); + WriteChannelReg8(chan, reg); + WriteChannelReg8(chan, reg + 1); + } + return; + } -void ArmSetRST() -{ - ARMRST&=1; - g_SoundCPU->SetResetState(ARMRST); -} + if (addr < 0x2800) + { + if (sz == 1) + { + WriteMemArr(aica_reg, addr, data, 1); + } + else + { + WriteMemArr(aica_reg, addr, data, 2); + } + return; + } -//Init/res/term -void aica_mmio_Init() -{ - RealTimeClock = GetRTC_now(); -} + if (addr < 0x2818) + { + if (sz == 1) + { + WriteCommonReg8(addr, data); + } + else + { + WriteCommonReg8(addr, data & 0xFF); + WriteCommonReg8(addr + 1, data >> 8); + } + return; + } -void aica_mmio_Reset(bool Manual) -{ - aica_mmio_Init(); -} + if (addr >= 0x3000) + { + if (sz == 1) + { + WriteMemArr(aica_reg, addr, data, 1); + dsp->WritenMem(addr); + } + else + { + WriteMemArr(aica_reg, addr, data, 2); + dsp->WritenMem(addr); + dsp->WritenMem(addr + 1); + } + } + if (sz == 1) + WriteAicaReg<1>(addr, data); + else + WriteAicaReg<2>(addr, data); + } -void aica_mmio_Term() -{ + void ArmSetRST() + { + ARMRST &= 1; + libARM_SetResetState(ARMRST); + } -} + //Aica reads (both sh4&arm) + u32 AICA_ReadReg(u32 addr, u32 size) + { + if (size == 1) + return ReadReg_internal<1>(addr & 0x7FFF); + else + return ReadReg_internal<2>(addr & 0x7FFF); + //must never come here + return 0; + } -struct AicaDevice : MMIODevice { + void AICA_WriteReg(u32 addr, u32 data, u32 size) + { + if (size == 1) + WriteReg_internal<1>(addr & 0x7FFF, data); + else + WriteReg_internal<2>(addr & 0x7FFF, data); + } int dma_end_sched(int tag, int cycl, int jitt) @@ -408,8 +587,11 @@ struct AicaDevice : MMIODevice { SystemBus* sb; ASIC* asic; + DSP* dsp; + u8* aica_ram; + u32 aram_size; - AicaDevice(SystemBus* sb, ASIC* asic) : sb(sb), asic(asic) { } + AicaDevice(SystemBus* sb, ASIC* asic, DSP* dsp, u8* aica_ram, u32 aram_size) : sb(sb), asic(asic), dsp(dsp), aica_ram(aica_ram), aram_size(aram_size) { } u32 Read(u32 addr, u32 sz) { addr &= 0x7FFF; @@ -425,7 +607,7 @@ struct AicaDevice : MMIODevice { } else { - return g_AICA->ReadReg(addr, sz); + return ReadReg(addr, sz); } } else @@ -436,7 +618,7 @@ struct AicaDevice : MMIODevice { } else { - return g_AICA->ReadReg(addr, sz); + return ReadReg(addr, sz); } } } @@ -458,7 +640,7 @@ struct AicaDevice : MMIODevice { } else { - g_AICA->WriteReg(addr, data, sz); + WriteReg(addr, data, sz); } } else @@ -472,13 +654,33 @@ struct AicaDevice : MMIODevice { } else { - g_AICA->WriteReg(addr, data, sz); + WriteReg(addr, data, sz); } } } bool Init() - { + { + verify(sizeof(*CommonData) == 0x508); + verify(sizeof(*DSPData) == 0x15C8); + + CommonData = (CommonData_struct*)&aica_reg[0x2800]; + DSPData = (DSPData_struct*)&aica_reg[0x3000]; + //slave cpu (arm7) + + SCIEB = (InterruptInfo*)&aica_reg[0x289C]; + SCIPD = (InterruptInfo*)&aica_reg[0x289C + 4]; + SCIRE = (InterruptInfo*)&aica_reg[0x289C + 8]; + //Main cpu (sh4) + MCIEB = (InterruptInfo*)&aica_reg[0x28B4]; + MCIPD = (InterruptInfo*)&aica_reg[0x28B4 + 4]; + MCIRE = (InterruptInfo*)&aica_reg[0x28B4 + 8]; + + sgc_Init(aica_reg, aica_ram, aram_size); + + for (int i = 0; i < 3; i++) + timers[i].Init(aica_reg, i); + //NRM //6 sb->RegisterRIO(this, SB_ADST_addr, RIO_WF, 0, STATIC_FORWARD(AicaDevice, Write_SB_ADST)); @@ -497,14 +699,90 @@ struct AicaDevice : MMIODevice { void Reset(bool Manual) { + memset(aica_reg, 0, sizeof(aica_reg)); + + ARMRST = 0; + VREG = 0; + + ArmSetRST(); } void Term() { + sgc_Term(); + } + + //Mainloop + void Update(u32 Samples) + { + AICA_Sample32(); + } + + //Aica reads (both sh4&arm) + u32 ReadReg(u32 addr, u32 size) { + return AICA_ReadReg(addr, size); + } + + void WriteReg(u32 addr, u32 data, u32 size) { + AICA_WriteReg(addr, data, size); + } + + + void TimeStep() + { + for (int i = 0; i < 3; i++) + timers[i].StepTimer(1); + + SCIPD->SAMPLE_DONE = 1; + + if (settings.aica.NoBatch) + AICA_Sample(); + + //Make sure sh4/arm interrupt system is up to date :) + update_arm_interrupts(); + UpdateSh4Ints(); + } + + void serialize(void** data, unsigned int* total_size) { + for (int i = 0; i < 3; i++) + { + REICAST_S(timers[i].c_step); + REICAST_S(timers[i].m_step); + } + + REICAST_S(VREG); + REICAST_S(ARMRST); + + REICAST_SA(aica_reg, 0x8000); + } + + void unserialize(void** data, unsigned int* total_size) { + for (int i = 0; i < 3; i++) + { + REICAST_US(timers[i].c_step); + REICAST_US(timers[i].m_step); + } + + REICAST_US(VREG); + REICAST_US(ARMRST); + + REICAST_USA(aica_reg, 0x8000); } }; -MMIODevice* Create_AicaDevice(SystemBus* sb, ASIC* asic) { - return new AicaDevice(sb, asic); +AICA* Create_AicaDevice(SystemBus* sb, ASIC* asic, DSP* dsp, u8* aica_ram, u32 aram_size) { + return new AicaDevice(sb, asic, dsp, aica_ram, aram_size); +} + +u32 libAICA_ReadReg(u32 addr, u32 sz) { + return sh4_cpu->GetA0H(A0H_AICA)->ReadReg(addr, sz); +} + +void libAICA_WriteReg(u32 addr, u32 data, u32 sz) { + sh4_cpu->GetA0H(A0H_AICA)->WriteReg(addr, data, sz); +} + +void libAICA_TimeStep() { + sh4_cpu->GetA0H(A0H_AICA)->TimeStep(); } \ No newline at end of file diff --git a/libswirl/hw/aica/aica_mmio.h b/libswirl/hw/aica/aica_mmio.h index 03d6eb8e56..84b3830af1 100644 --- a/libswirl/hw/aica/aica_mmio.h +++ b/libswirl/hw/aica/aica_mmio.h @@ -1,19 +1,26 @@ #pragma once #include "types.h" #include "hw/holly/sb.h" +#include "hw/sh4/sh4_mmio.h" -extern u32 VREG; -extern u32 RealTimeClock; +struct AICA : MMIODevice { + //Mainloop + virtual void Update(u32 Samples) = 0; -u32 ReadMem_aica_rtc(u32 addr,u32 sz); -void WriteMem_aica_rtc(u32 addr,u32 data,u32 sz); + //Aica reads (both sh4&arm) + virtual u32 ReadReg(u32 addr, u32 size) = 0; + virtual void WriteReg(u32 addr, u32 data, u32 size) = 0; -// Used for dreamcast second setup -void aica_mmio_Init(); -void aica_mmio_Reset(bool Manual); -void aica_mmio_Term(); + virtual void TimeStep() = 0; +}; +struct ASIC; +struct DSP; -// used internally by aica.cpp -u32 libAICA_ReadReg(u32 addr, u32 size); -void libAICA_WriteReg(u32 addr, u32 data, u32 size); \ No newline at end of file +AICA* Create_AicaDevice(SystemBus* sb, ASIC* asic, DSP* dsp, u8* aica_ram, u32 aram_size); + +MMIODevice* Create_RTCDevice(); + +void libAICA_TimeStep(); + +u32 libAICA_GetRTC_now(); \ No newline at end of file diff --git a/libswirl/hw/aica/dsp.cpp b/libswirl/hw/aica/dsp.cpp new file mode 100644 index 0000000000..b89a4d59e9 --- /dev/null +++ b/libswirl/hw/aica/dsp.cpp @@ -0,0 +1,161 @@ +#include "dsp.h" +#include "dsp_backend.h" +#include "aica_mem.h" +#include "oslib/oslib.h" +#include + +DECL_ALIGN(4096) dsp_context_t dsp; + +//float format is ? +u16 DYNACALL DSPBackend::PACK(s32 val) +{ + u32 temp; + int sign, exponent, k; + + sign = (val >> 23) & 0x1; + temp = (val ^ (val << 1)) & 0xFFFFFF; + exponent = 0; + for (k = 0; k < 12; k++) + { + if (temp & 0x800000) + break; + temp <<= 1; + exponent += 1; + } + if (exponent < 12) + val = (val << exponent) & 0x3FFFFF; + else + val <<= 11; + val >>= 11; + val |= sign << 15; + val |= exponent << 11; + + return (u16)val; +} + +s32 DYNACALL DSPBackend::UNPACK(u16 val) +{ + int sign, exponent, mantissa; + s32 uval; + + sign = (val >> 15) & 0x1; + exponent = (val >> 11) & 0xF; + mantissa = val & 0x7FF; + uval = mantissa << 11; + if (exponent > 11) + exponent = 11; + else + uval |= (sign ^ 1) << 22; + uval |= sign << 23; + uval <<= 8; + uval >>= 8; + uval >>= exponent; + + return uval; +} + +void DSPBackend::DecodeInst(u32* IPtr, _INST* i) +{ + i->TRA = (IPtr[0] >> 9) & 0x7F; + i->TWT = (IPtr[0] >> 8) & 0x01; + i->TWA = (IPtr[0] >> 1) & 0x7F; + + i->XSEL = (IPtr[1] >> 15) & 0x01; + i->YSEL = (IPtr[1] >> 13) & 0x03; + i->IRA = (IPtr[1] >> 7) & 0x3F; + i->IWT = (IPtr[1] >> 6) & 0x01; + i->IWA = (IPtr[1] >> 1) & 0x1F; + + i->TABLE = (IPtr[2] >> 15) & 0x01; + i->MWT = (IPtr[2] >> 14) & 0x01; + i->MRD = (IPtr[2] >> 13) & 0x01; + i->EWT = (IPtr[2] >> 12) & 0x01; + i->EWA = (IPtr[2] >> 8) & 0x0F; + i->ADRL = (IPtr[2] >> 7) & 0x01; + i->FRCL = (IPtr[2] >> 6) & 0x01; + i->SHIFT = (IPtr[2] >> 4) & 0x03; + i->YRL = (IPtr[2] >> 3) & 0x01; + i->NEGB = (IPtr[2] >> 2) & 0x01; + i->ZERO = (IPtr[2] >> 1) & 0x01; + i->BSEL = (IPtr[2] >> 0) & 0x01; + + i->NOFL = (IPtr[3] >> 15) & 1; //???? + //i->COEF=(IPtr[3]>>9)&0x3f; + + i->MASA = (IPtr[3] >> 9) & 0x3f; //??? + i->ADREB = (IPtr[3] >> 8) & 0x1; + i->NXADR = (IPtr[3] >> 7) & 0x1; +} + +struct DSP_impl final : DSP { + u8* aica_ram; + u32 aram_size; + + unique_ptr backend; + + DSP_impl(u8* aica_ram, u32 aram_size) : aica_ram(aica_ram), aram_size(aram_size) { + setBackend(DSPBE_INTERPRETER); + } + + bool Init() { + // XX is this the right place for this? + memset(DSPData, 0, sizeof(*DSPData)); + + memset(&dsp, 0, sizeof(dsp)); + dsp.RBL = 0x8000 - 1; + dsp.Stopped = 1; + dsp.regs.MDEC_CT = 1; + dsp.dyndirty = true; + + + return true; + } + + void WritenMem(u32 addr) + { + if (addr >= 0x3400 && addr < 0x3C00) + { + dsp.dyndirty = true; + } + else if (addr >= 0x4000 && addr < 0x4400) + { + // TODO proper sharing of memory with sh4 through DSPData + memset(dsp.TEMP, 0, sizeof(dsp.TEMP)); + } + else if (addr >= 0x4400 && addr < 0x4500) + { + // TODO proper sharing of memory with sh4 through DSPData + memset(dsp.MEMS, 0, sizeof(dsp.MEMS)); + } + } + + void Step() { + if (dsp.dyndirty) { + backend->Recompile(); + dsp.dyndirty = false; + } + + backend->Step(); + } + + bool setBackend(DspBackends type) { + dsp.dyndirty = true; + + if (type == DSPBE_INTERPRETER) { + backend.reset(DSPBackend::CreateInterpreter(aica_ram, aram_size)); + return true; + } +#if FEAT_DSPREC == DYNAREC_JIT + else if (type == DSPBE_DYNAREC) { + backend.reset(DSPBackend::CreateJIT(aica_ram, aram_size)); + return true; + } +#endif + + return false; + } +}; + +DSP* DSP::Create(u8* aica_ram, u32 aram_size) { + return new DSP_impl(aica_ram, aram_size); +} \ No newline at end of file diff --git a/libswirl/hw/aica/dsp.h b/libswirl/hw/aica/dsp.h index 6d5b76c918..04ba03be10 100644 --- a/libswirl/hw/aica/dsp.h +++ b/libswirl/hw/aica/dsp.h @@ -1,129 +1,20 @@ #pragma once -#include "aica.h" +#include "hw/sh4/sh4_mmio.h" -struct dsp_t -{ - //Dynarec - u8 DynCode[4096*8]; //32 kb, 8 pages - - //buffered DSP state - //24 bit wide - s32 TEMP[128]; - //24 bit wide - s32 MEMS[32]; - //20 bit wide - s32 MIXS[16]; - - //RBL/RBP (decoded) - u32 RBP; - u32 RBL; - - struct - { - bool MAD_OUT; - bool MEM_ADDR; - bool MEM_RD_DATA; - bool MEM_WT_DATA; - bool FRC_REG; - bool ADRS_REG; - bool Y_REG; - - bool MDEC_CT; - bool MWT_1; - bool MRD_1; - //bool MADRS; - bool MEMS; - bool NOFL_1; - bool NOFL_2; - - bool TEMPS; - bool EFREG; - }regs_init; - - //s32 -> stored as signed extended to 32 bits - struct - { - s32 MAD_OUT; - s32 MEM_ADDR; - s32 MEM_RD_DATA; - s32 MEM_WT_DATA; - s32 FRC_REG; - s32 ADRS_REG; - s32 Y_REG; - - u32 MDEC_CT; - u32 MWT_1; - u32 MRD_1; - u32 MADRS; - u32 NOFL_1; - u32 NOFL_2; - }regs; - //DEC counter :) - //u32 DEC; - - //various dsp regs - signed int ACC; //26 bit - signed int SHIFTED; //24 bit - signed int B; //26 bit - signed int MEMVAL[4]; - signed int FRC_REG; //13 bit - signed int Y_REG; //24 bit - unsigned int ADDR; - unsigned int ADRS_REG; //13 bit - - //Direct Mapped data : - //COEF *128 - //MADRS *64 - //MPRO(dsp code) *4 *128 - //EFREG *16 - //EXTS *2 - - // Interpreter flags - bool Stopped; - - //Dynarec flags - bool dyndirty; +enum DspBackends { + DSPBE_INTERPRETER, + DSPBE_DYNAREC, }; -DECL_ALIGN(4096) -extern dsp_t dsp; - -void dsp_init(); -void dsp_term(); -void dsp_step(); -void dsp_writenmem(u32 addr); - -struct _INST -{ - unsigned int TRA; - unsigned int TWT; - unsigned int TWA; +struct DSP : MMIODevice { + virtual void Step() = 0; + virtual void WritenMem(u32 addr) = 0; + virtual bool setBackend(DspBackends backend) = 0; - unsigned int XSEL; - unsigned int YSEL; - unsigned int IRA; - unsigned int IWT; - unsigned int IWA; - - unsigned int EWT; - unsigned int EWA; - unsigned int ADRL; - unsigned int FRCL; - unsigned int SHIFT; - unsigned int YRL; - unsigned int NEGB; - unsigned int ZERO; - unsigned int BSEL; - - unsigned int NOFL; //MRQ set - unsigned int TABLE; //MRQ set - unsigned int MWT; //MRQ set - unsigned int MRD; //MRQ set - unsigned int MASA; //MRQ set - unsigned int ADREB; //MRQ set - unsigned int NXADR; //MRQ set + static DSP* Create(u8* aica_ram, u32 aram_size); }; -void DecodeInst(u32 *IPtr,_INST *i); -u16 DYNACALL PACK(s32 val); -s32 DYNACALL UNPACK(u16 val); +static void libDSP_Step() { + sh4_cpu->GetA0H(A0H_DSP)->Step(); +} + diff --git a/libswirl/hw/aica/dsp_arm64.cpp b/libswirl/hw/aica/dsp_arm64.cpp index 81a24dc030..9d9a69b4fe 100644 --- a/libswirl/hw/aica/dsp_arm64.cpp +++ b/libswirl/hw/aica/dsp_arm64.cpp @@ -22,7 +22,7 @@ #if HOST_CPU == CPU_ARM64 && FEAT_DSPREC != DYNAREC_NONE #include -#include "dsp.h" +#include "dsp_backend.h" #include "hw/aica/aica_mem.h" #include "deps/vixl/aarch64/macro-assembler-aarch64.h" using namespace vixl::aarch64; @@ -34,7 +34,7 @@ class DSPAssembler : public MacroAssembler public: DSPAssembler(u8 *code_buffer, size_t size) : MacroAssembler(code_buffer, size), aica_ram_lit(NULL) {} - void Compile(struct dsp_t *DSP) + void Compile(u8* aica_ram, u32 aram_size, dsp_context_t *DSP) { this->DSP = DSP; //printf("DSPAssembler::DSPCompile recompiling for arm64 at %p\n", GetBuffer()->GetStartAddress()); @@ -110,7 +110,7 @@ class DSPAssembler : public MacroAssembler { u32 *mpro = &DSPData->MPRO[step * 4]; _INST op; - DecodeInst(mpro, &op); + DSPBackend::DecodeInst(mpro, &op); const u32 COEF = step; if (op.XSEL || op.YRL || (op.ADRL && op.SHIFT != 3)) @@ -315,11 +315,11 @@ class DSPAssembler : public MacroAssembler if (op.MRD) // memory only allowed on odd. DoA inserts NOPs on even { //MEMVAL[(step + 2) & 3] = UNPACK(*(u16 *)&aica_ram[ADDR & ARAM_MASK]); - CalculateADDR(ADDR, op, ADRS_REG, MDEC_CT); - Ldr(x1, GetAicaRam()); + CalculateADDR(aram_size, ADDR, op, ADRS_REG, MDEC_CT); + Ldr(x1, GetAicaRam(aica_ram)); MemOperand aram_op(x1, Register::GetXRegFromCode(ADDR.GetCode())); Ldrh(w0, aram_op); - GenCallRuntime(UNPACK); + GenCallRuntime(DSPBackend::UNPACK); Mov(w2, w0); Str(w2, dsp_operand(DSP->MEMVAL, (step + 2) & 3)); } @@ -327,11 +327,11 @@ class DSPAssembler : public MacroAssembler { // *(u16 *)&aica_ram[ADDR & ARAM_MASK] = PACK(SHIFTED); Mov(w0, SHIFTED); - GenCallRuntime(PACK); + GenCallRuntime(DSPBackend::PACK); Mov(w2, w0); - CalculateADDR(ADDR, op, ADRS_REG, MDEC_CT); - Ldr(x1, GetAicaRam()); + CalculateADDR(aram_size, ADDR, op, ADRS_REG, MDEC_CT); + Ldr(x1, GetAicaRam(aica_ram)); MemOperand aram_op(x1, Register::GetXRegFromCode(ADDR.GetCode())); Strh(w2, aram_op); } @@ -395,7 +395,7 @@ class DSPAssembler : public MacroAssembler private: MemOperand dsp_operand(void *data, int index = 0, u32 element_size = 4) { - ptrdiff_t offset = ((u8*)data - (u8*)DSP) - offsetof(dsp_t, TEMP) + index * element_size; + ptrdiff_t offset = ((u8*)data - (u8*)DSP) - offsetof(dsp_context_t, TEMP) + index * element_size; if (offset < 16384) return MemOperand(x28, offset); Mov(x0, offset); @@ -404,7 +404,7 @@ class DSPAssembler : public MacroAssembler MemOperand dsp_operand(void *data, const Register& offset_reg, u32 element_size = 4) { - ptrdiff_t offset = ((u8*)data - (u8*)DSP) - offsetof(dsp_t, TEMP); + ptrdiff_t offset = ((u8*)data - (u8*)DSP) - offsetof(dsp_context_t, TEMP); if (offset == 0) return MemOperand(x28, offset_reg, LSL, element_size == 4 ? 2 : element_size == 2 ? 1 : 0); @@ -433,7 +433,7 @@ class DSPAssembler : public MacroAssembler Bl(&function_label); } - void CalculateADDR(const Register& ADDR, const _INST& op, const Register& ADRS_REG, const Register& MDEC_CT) + void CalculateADDR(u32 aram_size, const Register& ADDR, const _INST& op, const Register& ADRS_REG, const Register& MDEC_CT) { //u32 ADDR = DSPData->MADRS[op.MASA]; Ldr(ADDR, dspdata_operand(DSPData->MADRS, op.MASA)); @@ -464,15 +464,15 @@ class DSPAssembler : public MacroAssembler // RBP is constant for this program Add(ADDR, ADDR, DSP->RBP); // ADDR & ARAM_MASK - if (ARAM_SIZE == 2*1024*1024) + if (aram_size == 2*1024*1024) Bfc(ADDR, 21, 11); - else if (ARAM_SIZE == 8*1024*1024) + else if (aram_size == 8*1024*1024) Bfc(ADDR, 23, 9); else die("Unsupported ARAM_SIZE"); } - Literal *GetAicaRam() + Literal *GetAicaRam(u8* aica_ram) { if (aica_ram_lit == NULL) aica_ram_lit = new Literal(&aica_ram[0], GetLiteralPool(), RawLiteral::kDeletedOnPoolDestruction); @@ -493,74 +493,46 @@ class DSPAssembler : public MacroAssembler } } - struct dsp_t *DSP; + struct dsp_context_t* DSP; Literal *aica_ram_lit; }; -void dsp_recompile() -{ - dsp.Stopped = true; - for (int i = 127; i >= 0; --i) - { - u32 *IPtr = DSPData->MPRO + i * 4; +struct DSPJITArm64 : DSPBackend { + u8* aica_ram; + u32 aram_size; - if (IPtr[0] != 0 || IPtr[1] != 0 || IPtr[2 ]!= 0 || IPtr[3] != 0) + DSPJITArm64(u8* aica_ram, u32 aram_size) : aica_ram(aica_ram), aram_size(aram_size) { + if (mprotect(dsp.DynCode, sizeof(dsp.DynCode), PROT_EXEC | PROT_READ | PROT_WRITE)) { - dsp.Stopped = false; - break; + perror("Couldn’t mprotect DSP code"); + die("mprotect failed in arm64 dsp"); } } - DSPAssembler assembler(&dsp.DynCode[0], sizeof(dsp.DynCode)); - assembler.Compile(&dsp); -} -void dsp_init() -{ - memset(&dsp, 0, sizeof(dsp)); - dsp.RBL = 0x8000 - 1; - dsp.RBP=0; - dsp.regs.MDEC_CT = 1; - dsp.dyndirty = true; - - if (mprotect(dsp.DynCode, sizeof(dsp.DynCode), PROT_EXEC | PROT_READ | PROT_WRITE)) + void Recompile() { - perror("Couldn’t mprotect DSP code"); - die("mprotect failed in arm64 dsp"); - } -} + dsp.Stopped = true; + for (int i = 127; i >= 0; --i) + { + u32* IPtr = DSPData->MPRO + i * 4; -void dsp_step() -{ - if (dsp.dyndirty) - { - dsp.dyndirty = false; - dsp_recompile(); + if (IPtr[0] != 0 || IPtr[1] != 0 || IPtr[2] != 0 || IPtr[3] != 0) + { + dsp.Stopped = false; + break; + } + } + DSPAssembler assembler(&dsp.DynCode[0], sizeof(dsp.DynCode)); + assembler.Compile(aica_ram, aram_size, &dsp); } -#ifdef _ANDROID - ((void (*)())&dsp.DynCode)(); -#endif -} - -void dsp_writenmem(u32 addr) -{ - if (addr >= 0x3400 && addr < 0x3C00) - { - dsp.dyndirty = true; - } - else if (addr >= 0x4000 && addr < 0x4400) - { - // TODO proper sharing of memory with sh4 through DSPData - memset(dsp.TEMP, 0, sizeof(dsp.TEMP)); - } - else if (addr >= 0x4400 && addr < 0x4500) - { - // TODO proper sharing of memory with sh4 through DSPData - memset(dsp.MEMS, 0, sizeof(dsp.MEMS)); - } -} + void Step() + { + ((void (*)())&dsp.DynCode[0])(); + } +}; -void dsp_term() -{ +DSPBackend* DSPBackend::CreateJIT(u8* aica_ram, u32 aram_size) { + return new DSPJITArm64(aica_ram, aram_size); } #endif diff --git a/libswirl/hw/aica/dsp_backend.h b/libswirl/hw/aica/dsp_backend.h new file mode 100644 index 0000000000..65e0eb6cd0 --- /dev/null +++ b/libswirl/hw/aica/dsp_backend.h @@ -0,0 +1,136 @@ +#pragma once +#include "aica.h" + +struct dsp_context_t +{ + //Dynarec + u8 DynCode[4096 * 8]; //32 kb, 8 pages + + //buffered DSP state + //24 bit wide + s32 TEMP[128]; + //24 bit wide + s32 MEMS[32]; + //20 bit wide + s32 MIXS[16]; + + //RBL/RBP (decoded) + u32 RBP; + u32 RBL; + + struct + { + bool MAD_OUT; + bool MEM_ADDR; + bool MEM_RD_DATA; + bool MEM_WT_DATA; + bool FRC_REG; + bool ADRS_REG; + bool Y_REG; + + bool MDEC_CT; + bool MWT_1; + bool MRD_1; + //bool MADRS; + bool MEMS; + bool NOFL_1; + bool NOFL_2; + + bool TEMPS; + bool EFREG; + }regs_init; + + //s32 -> stored as signed extended to 32 bits + struct + { + s32 MAD_OUT; + s32 MEM_ADDR; + s32 MEM_RD_DATA; + s32 MEM_WT_DATA; + s32 FRC_REG; + s32 ADRS_REG; + s32 Y_REG; + + u32 MDEC_CT; + u32 MWT_1; + u32 MRD_1; + u32 MADRS; + u32 NOFL_1; + u32 NOFL_2; + }regs; + //DEC counter :) + //u32 DEC; + + //various dsp regs + signed int ACC; //26 bit + signed int SHIFTED; //24 bit + signed int B; //26 bit + signed int MEMVAL[4]; + signed int FRC_REG; //13 bit + signed int Y_REG; //24 bit + unsigned int ADDR; + unsigned int ADRS_REG; //13 bit + + //Direct Mapped data : + //COEF *128 + //MADRS *64 + //MPRO(dsp code) *4 *128 + //EFREG *16 + //EXTS *2 + + // Interpreter flags + bool Stopped; + + //Dynarec flags + bool dyndirty; +}; + +DECL_ALIGN(4096) +extern dsp_context_t dsp; + +struct _INST +{ + unsigned int TRA; + unsigned int TWT; + unsigned int TWA; + + unsigned int XSEL; + unsigned int YSEL; + unsigned int IRA; + unsigned int IWT; + unsigned int IWA; + + unsigned int EWT; + unsigned int EWA; + unsigned int ADRL; + unsigned int FRCL; + unsigned int SHIFT; + unsigned int YRL; + unsigned int NEGB; + unsigned int ZERO; + unsigned int BSEL; + + unsigned int NOFL; //MRQ set + unsigned int TABLE; //MRQ set + unsigned int MWT; //MRQ set + unsigned int MRD; //MRQ set + unsigned int MASA; //MRQ set + unsigned int ADREB; //MRQ set + unsigned int NXADR; //MRQ set +}; + + + +struct DSPBackend { + static u16 DYNACALL PACK(s32 val); + static s32 DYNACALL UNPACK(u16 val); + static void DecodeInst(u32* IPtr, _INST* i); + + virtual void Step() = 0; + virtual void Recompile() = 0; + + virtual ~DSPBackend() { } + + static DSPBackend* CreateInterpreter(u8* aica_ram, u32 aram_size); + static DSPBackend* CreateJIT(u8* aica_ram, u32 aram_size); +}; diff --git a/libswirl/hw/aica/dsp_interp.cpp b/libswirl/hw/aica/dsp_interp.cpp index 82bcab8c63..553f80f3af 100644 --- a/libswirl/hw/aica/dsp_interp.cpp +++ b/libswirl/hw/aica/dsp_interp.cpp @@ -4,354 +4,324 @@ // Copyright (c) 2007-2009 R. Belmont and Richard Bannister, and others. // All rights reserved. // -#include "dsp.h" +#include "dsp_backend.h" #include "aica_mem.h" -#if FEAT_DSPREC != DYNAREC_JIT - #ifdef RELEASE #undef verify #define verify(...) #endif -void AICADSP_Init(struct dsp_t *DSP) -{ - memset(DSP, 0, sizeof(*DSP)); - DSP->RBL = 0x8000 - 1; - DSP->Stopped = 1; - dsp.regs.MDEC_CT = 1; -} +struct DSPInterpreter_impl : DSPBackend { + u8* aica_ram; + u32 aram_mask; -void AICADSP_Step(struct dsp_t *DSP) -{ - s32 ACC = 0; //26 bit - s32 SHIFTED = 0; //24 bit - s32 X = 0; //24 bit - s32 Y = 0; //13 bit - s32 B = 0; //26 bit - s32 INPUTS = 0; //24 bit - s32 MEMVAL[4] = {0}; - s32 FRC_REG = 0; //13 bit - s32 Y_REG = 0; //24 bit - u32 ADRS_REG = 0; //13 bit - int step; - - memset(DSPData->EFREG, 0, sizeof(DSPData->EFREG)); - - if (DSP->Stopped) - return; + DSPInterpreter_impl(u8* aica_ram, u32 aram_size) : aica_ram(aica_ram), aram_mask(aram_size - 1) { } + + void AICADSP_Step(struct dsp_context_t* DSP) + { + s32 ACC = 0; //26 bit + s32 SHIFTED = 0; //24 bit + s32 X = 0; //24 bit + s32 Y = 0; //13 bit + s32 B = 0; //26 bit + s32 INPUTS = 0; //24 bit + s32 MEMVAL[4] = { 0 }; + s32 FRC_REG = 0; //13 bit + s32 Y_REG = 0; //24 bit + u32 ADRS_REG = 0; //13 bit + int step; + + memset(DSPData->EFREG, 0, sizeof(DSPData->EFREG)); + + if (DSP->Stopped) + return; #if 0 - int dump = 0; - FILE *f = NULL; - if (dump) - f = fopen("dsp.txt", "wt"); + int dump = 0; + FILE* f = NULL; + if (dump) + f = fopen("dsp.txt", "wt"); #endif - for (step = 0; step < 128; ++step) - { - u32 *IPtr = DSPData->MPRO + step * 4; - - if (IPtr[0] == 0 && IPtr[1] == 0 && IPtr[2] == 0 && IPtr[3] == 0) + for (step = 0; step < 128; ++step) { - // Empty instruction shortcut - X = DSP->TEMP[DSP->regs.MDEC_CT & 0x7F]; - X <<= 8; - X >>= 8; - Y = FRC_REG; - Y <<= 19; - Y >>= 19; - - s64 v = ((s64)X * (s64)Y) >> 10; - v <<= 6; // 26 bits only - v >>= 6; - ACC = v + X; - ACC <<= 6; // 26 bits only - ACC >>= 6; + u32* IPtr = DSPData->MPRO + step * 4; - continue; - } + if (IPtr[0] == 0 && IPtr[1] == 0 && IPtr[2] == 0 && IPtr[3] == 0) + { + // Empty instruction shortcut + X = DSP->TEMP[DSP->regs.MDEC_CT & 0x7F]; + X <<= 8; + X >>= 8; + Y = FRC_REG; + Y <<= 19; + Y >>= 19; + + s64 v = ((s64)X * (s64)Y) >> 10; + v <<= 6; // 26 bits only + v >>= 6; + ACC = v + X; + ACC <<= 6; // 26 bits only + ACC >>= 6; + + continue; + } - u32 TRA = (IPtr[0] >> 9) & 0x7F; - u32 TWT = (IPtr[0] >> 8) & 0x01; + u32 TRA = (IPtr[0] >> 9) & 0x7F; + u32 TWT = (IPtr[0] >> 8) & 0x01; - u32 XSEL = (IPtr[1] >> 15) & 0x01; - u32 YSEL = (IPtr[1] >> 13) & 0x03; - u32 IRA = (IPtr[1] >> 7) & 0x3F; - u32 IWT = (IPtr[1] >> 6) & 0x01; + u32 XSEL = (IPtr[1] >> 15) & 0x01; + u32 YSEL = (IPtr[1] >> 13) & 0x03; + u32 IRA = (IPtr[1] >> 7) & 0x3F; + u32 IWT = (IPtr[1] >> 6) & 0x01; - u32 EWT = (IPtr[2] >> 12) & 0x01; - u32 ADRL = (IPtr[2] >> 7) & 0x01; - u32 FRCL = (IPtr[2] >> 6) & 0x01; - u32 SHIFT = (IPtr[2] >> 4) & 0x03; - u32 YRL = (IPtr[2] >> 3) & 0x01; - u32 NEGB = (IPtr[2] >> 2) & 0x01; - u32 ZERO = (IPtr[2] >> 1) & 0x01; - u32 BSEL = (IPtr[2] >> 0) & 0x01; + u32 EWT = (IPtr[2] >> 12) & 0x01; + u32 ADRL = (IPtr[2] >> 7) & 0x01; + u32 FRCL = (IPtr[2] >> 6) & 0x01; + u32 SHIFT = (IPtr[2] >> 4) & 0x03; + u32 YRL = (IPtr[2] >> 3) & 0x01; + u32 NEGB = (IPtr[2] >> 2) & 0x01; + u32 ZERO = (IPtr[2] >> 1) & 0x01; + u32 BSEL = (IPtr[2] >> 0) & 0x01; - u32 COEF = step; + u32 COEF = step; - // operations are done at 24 bit precision + // operations are done at 24 bit precision #if 0 #define DUMP(v) printf(" " #v ": %04X",v); - printf("%d: ",step); - DUMP(ACC); - DUMP(SHIFTED); - DUMP(X); - DUMP(Y); - DUMP(B); - DUMP(INPUTS); - DUMP(MEMVAL); - DUMP(FRC_REG); - DUMP(Y_REG); - DUMP(ADDR); - DUMP(ADRS_REG); - printf("\n"); + printf("%d: ", step); + DUMP(ACC); + DUMP(SHIFTED); + DUMP(X); + DUMP(Y); + DUMP(B); + DUMP(INPUTS); + DUMP(MEMVAL); + DUMP(FRC_REG); + DUMP(Y_REG); + DUMP(ADDR); + DUMP(ADRS_REG); + printf("\n"); #endif - // INPUTS RW - verify(IRA < 0x38); - if (IRA <= 0x1f) - INPUTS = DSP->MEMS[IRA]; - else if (IRA <= 0x2F) - INPUTS = DSP->MIXS[IRA - 0x20] << 4; // MIXS is 20 bit - else if (IRA <= 0x31) - INPUTS = DSPData->EXTS[IRA - 0x30] << 8; // EXTS is 16 bits - else - INPUTS = 0; - - INPUTS <<= 8; - INPUTS >>= 8; - - if (IWT) - { - u32 IWA = (IPtr[1] >> 1) & 0x1F; - DSP->MEMS[IWA] = MEMVAL[step & 3]; // MEMVAL was selected in previous MRD - // "When read and write are specified simultaneously in the same step for INPUTS, TEMP, etc., write is executed after read." - //if (IRA == IWA) - // INPUTS = MEMVAL[step & 3]; - } - - // Operand sel - // B - if (!ZERO) - { - if (BSEL) - B = ACC; + // INPUTS RW + verify(IRA < 0x38); + if (IRA <= 0x1f) + INPUTS = DSP->MEMS[IRA]; + else if (IRA <= 0x2F) + INPUTS = DSP->MIXS[IRA - 0x20] << 4; // MIXS is 20 bit + else if (IRA <= 0x31) + INPUTS = DSPData->EXTS[IRA - 0x30] << 8; // EXTS is 16 bits else + INPUTS = 0; + + INPUTS <<= 8; + INPUTS >>= 8; + + if (IWT) { - B = DSP->TEMP[(TRA + DSP->regs.MDEC_CT) & 0x7F]; - B <<= 8; //Sign extend - B >>= 8; + u32 IWA = (IPtr[1] >> 1) & 0x1F; + DSP->MEMS[IWA] = MEMVAL[step & 3]; // MEMVAL was selected in previous MRD + // "When read and write are specified simultaneously in the same step for INPUTS, TEMP, etc., write is executed after read." + //if (IRA == IWA) + // INPUTS = MEMVAL[step & 3]; } - if (NEGB) - B = 0 - B; - } - else - B = 0; - - // X - if (XSEL) - X = INPUTS; - else - { - X = DSP->TEMP[(TRA + DSP->regs.MDEC_CT) & 0x7F]; - X <<= 8; - X >>= 8; - } - // Y - if (YSEL == 0) - Y = FRC_REG; - else if (YSEL == 1) - Y = DSPData->COEF[COEF] >> 3; //COEF is 16 bits - else if (YSEL == 2) - Y = (Y_REG >> 11) & 0x1FFF; - else if (YSEL == 3) - Y = (Y_REG >> 4) & 0x0FFF; - - if (YRL) - Y_REG = INPUTS; - - // Shifter - // There's a 1-step delay at the output of the X*Y + B adder. So we use the ACC value from the previous step. - if (SHIFT == 0) - { - SHIFTED = ACC >> 2; // 26 bits -> 24 bits - if (SHIFTED > 0x0007FFFF) - SHIFTED = 0x0007FFFF; - if (SHIFTED < (-0x00080000)) - SHIFTED = -0x00080000; - } - else if (SHIFT == 1) - { - SHIFTED = ACC >> 1; // 26 bits -> 24 bits and x2 scale - if (SHIFTED > 0x0007FFFF) - SHIFTED = 0x0007FFFF; - if (SHIFTED < (-0x00080000)) - SHIFTED = -0x00080000; - } - else if (SHIFT == 2) - { - SHIFTED = ACC >> 1; - SHIFTED <<= 8; - SHIFTED >>= 8; - } - else if (SHIFT == 3) - { - SHIFTED = ACC >> 2; - SHIFTED <<= 8; - SHIFTED >>= 8; - } + // Operand sel + // B + if (!ZERO) + { + if (BSEL) + B = ACC; + else + { + B = DSP->TEMP[(TRA + DSP->regs.MDEC_CT) & 0x7F]; + B <<= 8; //Sign extend + B >>= 8; + } + if (NEGB) + B = 0 - B; + } + else + B = 0; - // ACCUM - Y <<= 19; - Y >>= 19; + // X + if (XSEL) + X = INPUTS; + else + { + X = DSP->TEMP[(TRA + DSP->regs.MDEC_CT) & 0x7F]; + X <<= 8; + X >>= 8; + } - s64 v = ((s64)X * (s64)Y) >> 10; // magic value from dynarec. 1 sign bit + 24-1 bits + 13-1 bits -> 26 bits? - v <<= 6; // 26 bits only - v >>= 6; - ACC = v + B; - ACC <<= 6; // 26 bits only - ACC >>= 6; + // Y + if (YSEL == 0) + Y = FRC_REG; + else if (YSEL == 1) + Y = DSPData->COEF[COEF] >> 3; //COEF is 16 bits + else if (YSEL == 2) + Y = (Y_REG >> 11) & 0x1FFF; + else if (YSEL == 3) + Y = (Y_REG >> 4) & 0x0FFF; + + if (YRL) + Y_REG = INPUTS; + + // Shifter + // There's a 1-step delay at the output of the X*Y + B adder. So we use the ACC value from the previous step. + if (SHIFT == 0) + { + SHIFTED = ACC >> 2; // 26 bits -> 24 bits + if (SHIFTED > 0x0007FFFF) + SHIFTED = 0x0007FFFF; + if (SHIFTED < (-0x00080000)) + SHIFTED = -0x00080000; + } + else if (SHIFT == 1) + { + SHIFTED = ACC >> 1; // 26 bits -> 24 bits and x2 scale + if (SHIFTED > 0x0007FFFF) + SHIFTED = 0x0007FFFF; + if (SHIFTED < (-0x00080000)) + SHIFTED = -0x00080000; + } + else if (SHIFT == 2) + { + SHIFTED = ACC >> 1; + SHIFTED <<= 8; + SHIFTED >>= 8; + } + else if (SHIFT == 3) + { + SHIFTED = ACC >> 2; + SHIFTED <<= 8; + SHIFTED >>= 8; + } - if (TWT) - { - u32 TWA = (IPtr[0] >> 1) & 0x7F; - DSP->TEMP[(TWA + DSP->regs.MDEC_CT) & 0x7F] = SHIFTED; - } + // ACCUM + Y <<= 19; + Y >>= 19; - if (FRCL) - { - if (SHIFT == 3) - FRC_REG = SHIFTED & 0x0FFF; - else - FRC_REG = (SHIFTED >> 11) & 0x1FFF; - } + s64 v = ((s64)X * (s64)Y) >> 10; // magic value from dynarec. 1 sign bit + 24-1 bits + 13-1 bits -> 26 bits? + v <<= 6; // 26 bits only + v >>= 6; + ACC = v + B; + ACC <<= 6; // 26 bits only + ACC >>= 6; - if (step & 1) - { - u32 MWT = (IPtr[2] >> 14) & 0x01; - u32 MRD = (IPtr[2] >> 13) & 0x01; + if (TWT) + { + u32 TWA = (IPtr[0] >> 1) & 0x7F; + DSP->TEMP[(TWA + DSP->regs.MDEC_CT) & 0x7F] = SHIFTED; + } - if (MRD || MWT) + if (FRCL) { - u32 TABLE = (IPtr[2] >> 15) & 0x01; - - u32 NOFL = (IPtr[3] >> 15) & 1; //???? - verify(!NOFL); - u32 MASA = (IPtr[3] >> 9) & 0x3f; //??? - u32 ADREB = (IPtr[3] >> 8) & 0x1; - u32 NXADR = (IPtr[3] >> 7) & 0x1; - - u32 ADDR = DSPData->MADRS[MASA]; - if (ADREB) - ADDR += ADRS_REG & 0x0FFF; - if (NXADR) - ADDR++; - if (!TABLE) - { - ADDR += DSP->regs.MDEC_CT; - ADDR &= DSP->RBL; // RBL is ring buffer length - 1 - } + if (SHIFT == 3) + FRC_REG = SHIFTED & 0x0FFF; else - ADDR &= 0xFFFF; + FRC_REG = (SHIFTED >> 11) & 0x1FFF; + } - ADDR <<= 1; // Word -> byte address - ADDR += DSP->RBP; // RBP is already a byte address - if (MRD) // memory only allowed on odd. DoA inserts NOPs on even - { - //if (NOFL) - // MEMVAL[(step + 2) & 3] = (*(s16 *)&aica_ram[ADDR]) << 8; - //else - MEMVAL[(step + 2) & 3] = UNPACK(*(u16 *)&aica_ram[ADDR & ARAM_MASK]); - } - if (MWT) + if (step & 1) + { + u32 MWT = (IPtr[2] >> 14) & 0x01; + u32 MRD = (IPtr[2] >> 13) & 0x01; + + if (MRD || MWT) { - // FIXME We should wait for the next step to copy stuff to SRAM (same as read) - //if (NOFL) - // *(s16 *)&aica_ram[ADDR] = SHIFTED >> 8; - //else - *(u16 *)&aica_ram[ADDR & ARAM_MASK] = PACK(SHIFTED); + u32 TABLE = (IPtr[2] >> 15) & 0x01; + + u32 NOFL = (IPtr[3] >> 15) & 1; //???? + verify(!NOFL); + u32 MASA = (IPtr[3] >> 9) & 0x3f; //??? + u32 ADREB = (IPtr[3] >> 8) & 0x1; + u32 NXADR = (IPtr[3] >> 7) & 0x1; + + u32 ADDR = DSPData->MADRS[MASA]; + if (ADREB) + ADDR += ADRS_REG & 0x0FFF; + if (NXADR) + ADDR++; + if (!TABLE) + { + ADDR += DSP->regs.MDEC_CT; + ADDR &= DSP->RBL; // RBL is ring buffer length - 1 + } + else + ADDR &= 0xFFFF; + + ADDR <<= 1; // Word -> byte address + ADDR += DSP->RBP; // RBP is already a byte address + if (MRD) // memory only allowed on odd. DoA inserts NOPs on even + { + //if (NOFL) + // MEMVAL[(step + 2) & 3] = (*(s16 *)&aica_ram[ADDR]) << 8; + //else + MEMVAL[(step + 2) & 3] = UNPACK(*(u16*)&aica_ram[ADDR & aram_mask]); + } + if (MWT) + { + // FIXME We should wait for the next step to copy stuff to SRAM (same as read) + //if (NOFL) + // *(s16 *)&aica_ram[ADDR] = SHIFTED >> 8; + //else + *(u16*)&aica_ram[ADDR & aram_mask] = PACK(SHIFTED); + } } } - } - if (ADRL) - { - if (SHIFT == 3) - ADRS_REG = (SHIFTED >> 12) & 0xFFF; - else - ADRS_REG = (INPUTS >> 16); - } + if (ADRL) + { + if (SHIFT == 3) + ADRS_REG = (SHIFTED >> 12) & 0xFFF; + else + ADRS_REG = (INPUTS >> 16); + } + + if (EWT) + { + u32 EWA = (IPtr[2] >> 8) & 0x0F; + // 4 ???? + DSPData->EFREG[EWA] += SHIFTED >> 4; // dynarec uses = instead of += + } - if (EWT) - { - u32 EWA = (IPtr[2] >> 8) & 0x0F; - // 4 ???? - DSPData->EFREG[EWA] += SHIFTED >> 4; // dynarec uses = instead of += } + --DSP->regs.MDEC_CT; + if (dsp.regs.MDEC_CT == 0) + dsp.regs.MDEC_CT = dsp.RBL + 1; // RBL is ring buffer length - 1 + // memset(DSP->MIXS, 0, sizeof(DSP->MIXS)); + // if(f) + // fclose(f); } - --DSP->regs.MDEC_CT; - if (dsp.regs.MDEC_CT == 0) - dsp.regs.MDEC_CT = dsp.RBL + 1; // RBL is ring buffer length - 1 - -// memset(DSP->MIXS, 0, sizeof(DSP->MIXS)); -// if(f) -// fclose(f); -} -void AICADSP_Start(struct dsp_t *DSP) -{ - dsp.Stopped = 1; - for (int i = 127; i >= 0; --i) + void Recompile () { - u32 *IPtr = DSPData->MPRO + i * 4; - - if (IPtr[0] != 0 || IPtr[1] != 0 || IPtr[2 ]!= 0 || IPtr[3] != 0) + dsp.Stopped = 1; + for (int i = 127; i >= 0; --i) { - DSP->Stopped = 0; - //printf("DSP: starting %d steps\n", i + 1); + u32* IPtr = DSPData->MPRO + i * 4; + + if (IPtr[0] != 0 || IPtr[1] != 0 || IPtr[2] != 0 || IPtr[3] != 0) + { + dsp.Stopped = 0; + //printf("DSP: starting %d steps\n", i + 1); - break; + break; + } } } -} -void dsp_init() -{ - AICADSP_Init(&dsp); - AICADSP_Start(&dsp); -} - -void dsp_term() -{ - dsp.Stopped = 1; -} - -void dsp_step() -{ - AICADSP_Step(&dsp); -} - -void dsp_writenmem(u32 addr) -{ - if (addr >= 0x3400 && addr < 0x3C00) - { - AICADSP_Start(&dsp); - } - else if (addr >= 0x4000 && addr < 0x4400) - { - // TODO proper sharing of memory with sh4 through DSPData - memset(dsp.TEMP, 0, sizeof(dsp.TEMP)); - } - else if (addr >= 0x4400 && addr < 0x4500) + void Step() { - // TODO proper sharing of memory with sh4 through DSPData - memset(dsp.MEMS, 0, sizeof(dsp.MEMS)); + AICADSP_Step(&dsp); } -} +}; -#endif +DSPBackend* DSPBackend::CreateInterpreter(u8* aica_ram, u32 aram_size) { + return new DSPInterpreter_impl(aica_ram, aram_size); +} diff --git a/libswirl/hw/aica/dsp_x86.cpp b/libswirl/hw/aica/dsp_x86.cpp index 79288738f7..2e4825e8a6 100644 --- a/libswirl/hw/aica/dsp_x86.cpp +++ b/libswirl/hw/aica/dsp_x86.cpp @@ -1,4 +1,4 @@ -#include "dsp.h" +#include "dsp_backend.h" #include "aica_mem.h" #include "oslib/oslib.h" @@ -19,824 +19,713 @@ See LICENSE & COPYRIGHT files further details */ -DECL_ALIGN(4096) dsp_t dsp; - -//float format is ? -u16 DYNACALL PACK(s32 val) -{ - u32 temp; - int sign,exponent,k; - - sign = (val >> 23) & 0x1; - temp = (val ^ (val << 1)) & 0xFFFFFF; - exponent = 0; - for (k=0; k<12; k++) - { - if (temp & 0x800000) - break; - temp <<= 1; - exponent += 1; - } - if (exponent < 12) - val = (val << exponent) & 0x3FFFFF; - else - val <<= 11; - val >>= 11; - val |= sign << 15; - val |= exponent << 11; - - return (u16)val; -} - -s32 DYNACALL UNPACK(u16 val) -{ - int sign,exponent,mantissa; - s32 uval; - - sign = (val >> 15) & 0x1; - exponent = (val >> 11) & 0xF; - mantissa = val & 0x7FF; - uval = mantissa << 11; - if (exponent > 11) - exponent = 11; - else - uval |= (sign ^ 1) << 22; - uval |= sign << 23; - uval <<= 8; - uval >>= 8; - uval >>= exponent; - - return uval; -} - -void DecodeInst(u32 *IPtr,_INST *i) -{ - i->TRA=(IPtr[0]>>9)&0x7F; - i->TWT=(IPtr[0]>>8)&0x01; - i->TWA=(IPtr[0]>>1)&0x7F; - - i->XSEL=(IPtr[1]>>15)&0x01; - i->YSEL=(IPtr[1]>>13)&0x03; - i->IRA=(IPtr[1]>>7)&0x3F; - i->IWT=(IPtr[1]>>6)&0x01; - i->IWA=(IPtr[1]>>1)&0x1F; - - i->TABLE=(IPtr[2]>>15)&0x01; - i->MWT=(IPtr[2]>>14)&0x01; - i->MRD=(IPtr[2]>>13)&0x01; - i->EWT=(IPtr[2]>>12)&0x01; - i->EWA=(IPtr[2]>>8)&0x0F; - i->ADRL=(IPtr[2]>>7)&0x01; - i->FRCL=(IPtr[2]>>6)&0x01; - i->SHIFT=(IPtr[2]>>4)&0x03; - i->YRL=(IPtr[2]>>3)&0x01; - i->NEGB=(IPtr[2]>>2)&0x01; - i->ZERO=(IPtr[2]>>1)&0x01; - i->BSEL=(IPtr[2]>>0)&0x01; - - i->NOFL=(IPtr[3]>>15)&1; //???? - //i->COEF=(IPtr[3]>>9)&0x3f; - - i->MASA=(IPtr[3]>>9)&0x3f; //??? - i->ADREB=(IPtr[3]>>8)&0x1; - i->NXADR=(IPtr[3]>>7)&0x1; -} #if HOST_CPU == CPU_X86 && FEAT_DSPREC == DYNAREC_JIT -#include "emitter/x86_emitter.h" - -const bool SUPPORT_NOFL=false; - - -#define assert verify - -#pragma warning(disable:4311) - -#define DYNBUF 0x10000 -/* -//#define USEFLOATPACK -//pack s24 to s1e4s11 -naked u16 packasm(s32 val) -{ - __asm - { - mov edx,ecx; //eax will be sign - and edx,0x80000; //get the sign - - jz poz; - neg ecx; - - poz: - bsr eax,ecx; - jz _zero; - - //24 -> 11 - //13 -> 0 - //12..0 -> 0 - sub eax,11; - cmovs eax,0; //if <0 -> 0 - - shr ecx,eax; //shift out mantissa as needed (yeah i know, no rounding here and all .. ) - - shr eax,12; //[14:12] is exp - or edx,ecx; //merge [15] | [11:0] - or eax,edx; //merge [14:12] | ([15] | [11:0]), result on eax - ret; - -_zero: - xor eax,eax; - ret; - } -} -//ONLY lower 16 bits are valid, rest are ignored but do movzx to avoid partial stalls :) -naked s32 unpackasm(u32 val) -{ - __asm - { - mov eax,ecx; //get mantissa bits - and ecx,0x7FF; // - - shl eax,11; //get shift factor (shift) - mov edx,eax; //keep a copy for the sign - and eax,0xF; //get shift factor (mask) - - shl ecx,eax; //shift mantissa to normal position - - test edx,0x10; //signed ? - jnz _negme; - - ret; //nop, return as is - -_negme: - //yep, negate and return - neg eax; - ret; - - } -}*/ - -void dsp_init() -{ - memset(&dsp,0,sizeof(dsp)); - memset(DSPData,0,sizeof(*DSPData)); - - dsp.dyndirty=true; - dsp.RBL=0x2000-1; - dsp.RBP=0; - dsp.regs.MDEC_CT=1; - - - //os_MakeExecutable(dsp.DynCode,sizeof(dsp.DynCode)); #if HOST_OS == OS_WINDOWS - DWORD old; - VirtualProtect(dsp.DynCode, sizeof(dsp.DynCode), PAGE_EXECUTE_READWRITE, &old); +#include #endif -} -void dsp_recompile(); +#include "jit/emitter/x86/x86_emitter.h" -void* dyna_realloc(void*ptr,u32 oldsize,u32 newsize) -{ - return dsp.DynCode; -} -void _dsp_debug_step_start() -{ - memset(&dsp.regs_init,0,sizeof(dsp.regs_init)); -} -void _dsp_debug_step_end() -{ - verify(dsp.regs_init.MAD_OUT); - verify(dsp.regs_init.MEM_ADDR); - verify(dsp.regs_init.MEM_RD_DATA); - verify(dsp.regs_init.MEM_WT_DATA); - verify(dsp.regs_init.FRC_REG); - verify(dsp.regs_init.ADRS_REG); - verify(dsp.regs_init.Y_REG); - - //verify(dsp.regs_init.MDEC_CT); // -> its done on C - verify(dsp.regs_init.MWT_1); - verify(dsp.regs_init.MRD_1); -// verify(dsp.regs_init.MADRS); //THAT WAS not real, MEM_ADDR is the deal ;p - verify(dsp.regs_init.MEMS); - verify(dsp.regs_init.NOFL_1); - verify(dsp.regs_init.NOFL_2); - verify(dsp.regs_init.TEMPS); - verify(dsp.regs_init.EFREG); -} -#define nwtn(x) verify(!dsp.regs_init.x) -#define wtn(x) nwtn(x);dsp.regs_init.x=true; +struct DSPJitX86: DSPBackend { + u8* aica_ram; + u32 aram_mask; -//sign extend to 32 bits -void dsp_rec_se(x86_block& x86e,x86_gpr_reg reg,u32 src_sz,u32 dst_sz=0xFF) -{ - if (dst_sz==0xFF) - dst_sz=src_sz; - //24 -> 32 (pad to 32 bits) - x86e.Emit(op_shl32,reg,32-src_sz); - //32 -> 24 (MSB propagation) - x86e.Emit(op_sar32,reg,32-dst_sz); -} -//Reads : MWT_1,MRD_1,MEM_ADDR -//Writes : Wire MEM_RD_DATA_NV -void dsp_rec_DRAM_CI(x86_block& x86e,_INST& prev_op,u32 step,x86_gpr_reg MEM_RD_DATA_NV) -{ - nwtn(MWT_1); - nwtn(MRD_1); - nwtn(MEM_ADDR); - nwtn(MEM_WT_DATA); - - //Request : step x (odd step) - //Operation : x+1 (even step) - //Data avail : x+2 (odd step, can request again) - if (!(step&1)) - { - //Get and mask ram address :) - x86e.Emit(op_mov32,EAX,&dsp.regs.MEM_ADDR); - x86e.Emit(op_and32,EAX,AICA_RAM_MASK); - - x86e.Emit(op_add32,EAX,(unat)aica_ram.data); - - //prev. opcode did a mem read request ? - if (prev_op.MRD) - { - //Do the read [MEM_ADDRS] -> MEM_RD_DATA_NV - x86e.Emit(op_movsx16to32,MEM_RD_DATA_NV,x86_mrm(EAX)); - } - //prev. opcode did a mem write request ? - if (prev_op.MWT) - { - //Do the write [MEM_ADDRS] <-MEM_WT_DATA - x86e.Emit(op_mov32,EDX,&dsp.regs.MEM_WT_DATA); - x86e.Emit(op_mov16,x86_mrm(EAX),EDX); - } - } -} -//Reads : ADRS_REG,MADRS,MDEC_CT -//Writes : MEM_ADDR -void dsp_rec_MEM_AGU(x86_block& x86e,_INST& op,u32 step) -{ - nwtn(ADRS_REG); - nwtn(MEM_ADDR); - - //These opcode fields are valid on odd steps (mem req. is only allowed then) - //MEM Request : step x - //Mem operation : step x+1 (address is available at this point) - if (step&1) - { - //Addrs is 16:1 - x86e.Emit(op_mov32,EAX,&DSPData->MADRS[op.MASA]); - - //Added if ADREB - if (op.ADREB) - x86e.Emit(op_add32,EAX,&dsp.regs.ADRS_REG); - - //+1 if NXADR is set - if (op.NXADR) - x86e.Emit(op_add32,EAX,1); - - //RBL warp around is here, according to docs, but that seems to cause _very_ bad results - // if (!op.TABLE) - // x86e.Emit(op_and32,EAX,dsp.RBL); - - //MDEC_CT is added if !TABLE - if (!op.TABLE) - x86e.Emit(op_add32,EAX,&dsp.regs.MDEC_CT); - - //RBL/RBP are constants for the program - //Apply RBL if !TABLE - //Else limit to 16 bit add - //*update* always limit to 16 bit add adter MDEC_CT ? - if (!op.TABLE) - x86e.Emit(op_and32,EAX,dsp.RBL); - else - x86e.Emit(op_and32,EAX,0xFFFF); - - //Calculate the value ! - //EAX*2 b/c it points to sample (16:1 of the address) - x86e.Emit(op_lea32,EDX,x86_mrm(EAX,sib_scale_2,x86_ptr::create(dsp.RBP))); - - //Save the result to MEM_ADDR - x86e.Emit(op_mov32,&dsp.regs.MEM_ADDR,EDX); - } - wtn(MEM_ADDR); -} -//Reads : MEMS,MIXS,EXTS -//Writes : INPUTS (Wire) -void dsp_rec_INPUTS(x86_block& x86e,_INST& op,x86_gpr_reg INPUTS) -{ - nwtn(MEMS); - - //nwtn(MIXS); -> these are read only :) - //nwtn(EXTS); - - //INPUTS is 24 bit, we convert everything to that - //Maby we dont need to convert, but just to sign extend ? - if(op.IRA<0x20) - { - x86e.Emit(op_mov32,INPUTS,&dsp.MEMS[op.IRA]); - dsp_rec_se(x86e,INPUTS,24); - } - else if(op.IRA<0x30) - { - x86e.Emit(op_mov32,INPUTS,&dsp.MIXS[op.IRA-0x20]); - dsp_rec_se(x86e,INPUTS,20,24); - } - else if(op.IRA<0x32) - { - x86e.Emit(op_mov32,ESI,&DSPData->EXTS[op.IRA-0x30]); - //x86e.Emit(op_shl32,INPUTS,8); - dsp_rec_se(x86e,INPUTS,16,24); - } - - //Sign extend to 32 bits - //dsp_rec_se(x86e,INPUTS,24); -} -//Reads : MEM_RD_DATA,NO_FLT2 -//Writes : MEMS -void dsp_rec_MEMS_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS) -{ - nwtn(MEM_RD_DATA); - nwtn(NOFL_2); - - //MEMS write reads from MEM_RD_DATA register (MEM_RD_DATA -> Converter -> MEMS). - //The converter's nofl flag has 2 steps delay (so that it can be set with the MRQ). - if (op.IWT) - { - x86e.Emit(op_movsx16to32,ECX,&dsp.regs.MEM_RD_DATA); - x86e.Emit(op_mov32,EAX,ECX); - - //Pad and signed extend EAX - //x86e.Emit(op_shl32,EAX,16); - //x86e.Emit(op_sar32,EAX,8); - x86e.Emit(op_shl32,EAX,8); - - if (SUPPORT_NOFL) - { - x86_Label* no_fl=x86e.CreateLabel(false,8);//no float conversions - - //Do we have to convert ? - x86e.Emit(op_cmp32,&dsp.regs.NOFL_2,1); - x86e.Emit(op_je,no_fl); - { - //Convert ! - x86e.Emit(op_call,x86_ptr_imm(UNPACK)); - } - x86e.MarkLabel(no_fl); - } - x86e.Emit(op_mov32,&dsp.MEMS[op.IWA],EAX); - } - - wtn(MEMS); -} -//Reads : MEM_RD_DATA_NV (Wire) -//Writes : MEM_RD_DATA -void dsp_rec_MEM_RD_DATA_WRITE(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg MEM_RD_DATA_NV) -{ - //Request : step x (odd step) - //Operation : x+1 (even step) - //Data avail : x+2 (odd step, can request again) - //The MEM_RD_DATA_NV wire exists only on even steps - if (!(step&1)) - { - x86e.Emit(op_mov32,&dsp.regs.MEM_RD_DATA,MEM_RD_DATA_NV); - } - - wtn(MEM_RD_DATA); -} - -x86_mrm_t dsp_reg_GenerateTempsAddrs(x86_block& x86e,u32 TEMPS_NUM,x86_gpr_reg TEMPSaddrsreg) -{ - x86e.Emit(op_mov32,TEMPSaddrsreg,&dsp.regs.MDEC_CT); - x86e.Emit(op_add32,TEMPSaddrsreg,TEMPS_NUM); - x86e.Emit(op_and32,TEMPSaddrsreg,127); - return x86_mrm(ECX,sib_scale_4,dsp.TEMP); -} -//Reads : INPUTS,TEMP,FRC_REG,COEF,Y_REG -//Writes : MAD_OUT_NV (Wire) -void dsp_rec_MAD(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS,x86_gpr_reg MAD_OUT_NV) -{ - bool use_TEMP=op.XSEL==0 || (op.BSEL==0 && op.ZERO==0); - - //TEMPS (if used) on ECX - const x86_gpr_reg TEMPS_reg=ECX; - if (use_TEMP) - { - //read temps - x86e.Emit(op_mov32,TEMPS_reg,dsp_reg_GenerateTempsAddrs(x86e,op.TRA,TEMPS_reg)); - dsp_rec_se(x86e,TEMPS_reg,24); - } - - x86_reg mul_x_input; - //X : 24 bits - if (op.XSEL==1) - { - //X=INPUTS - mul_x_input=INPUTS; - //x86e.Emit(op_mov32,EDX,INPUTS); - } - else - { - //X=TEMPS - mul_x_input=TEMPS_reg; - //x86e.Emit(op_mov32,EDX,TEMPS_reg); - } - - //MUL Y in : EAX - //Y : 13 bits - switch(op.YSEL) - { - case 0: - //Y=FRC_REG[13] - x86e.Emit(op_mov32,EAX,&dsp.regs.FRC_REG); - dsp_rec_se(x86e,EAX,13); - break; - - case 1: - //Y=COEF[13] - x86e.Emit(op_mov32,EAX,&DSPData->COEF[step]); - dsp_rec_se(x86e,EAX,16,13); - break; - - case 2: - //Y=Y_REG[23:11] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 19:7) - x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG); - dsp_rec_se(x86e,EAX,19,13); - break; - - case 3: - //Y=0'Y_REG[15:4] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 11:0) - x86e.Emit(op_mov32,EAX,&dsp.regs.Y_REG); - x86e.Emit(op_and32,0xFFF);//Clear bit 13+ - break; - } - - //Do the mul -- maby it has overflow protection ? - //24+13=37, -11 = 26 - //that can be >>1 or >>2 on the shifter after the mul - x86e.Emit(op_imul32,mul_x_input); - //*NOTE* here, shrd is unsigned, but we have EDX signed, and we may only shift up to 11 bits from it - //so it works just fine :) - x86e.Emit(op_shrd32,EAX,EDX,10); - - //cut the upper bits so that it is 26 bits signed - dsp_rec_se(x86e,EAX,26); - - //Adder, takes MUL_OUT at EAX - //Adds B (EDX) - //Outputs EAX - - if (!op.ZERO) //if zero is set the adder has no effect - { - if (op.BSEL==1) - { - //B=MAD_OUT[??] - //mad out is stored on s32 format, so no need for sign extension - x86e.Emit(op_mov32,EDX,&dsp.regs.MAD_OUT); - } - else - { - //B=TEMP[??] - //TEMPS is already sign extended, so no need for it - //Just converting 24 -> 26 bits using lea - x86e.Emit(op_lea32,EDX,x86_mrm(TEMPS_reg,sib_scale_4,0)); - } - //Gating is applied here normally (ZERO). - //NEGB then inverts the value (NOT) (or 0 , if gated) and the adder adds +1 if NEGB is set. - //However, (~X)+1 = -X , and (~0)+1=0 so i skip the add - if (op.NEGB) - { - x86e.Emit(op_neg32,EDX); - } - - //Add hm, is there overflow protection here ? - //The result of mul is on EAX, we modify that - x86e.Emit(op_add32,EAX,EDX); - } - - //cut the upper bits so that it is 26 bits signed - dsp_rec_se(x86e,EAX,26); - - //Write to MAD_OUT_NV wire :) - x86e.Emit(op_mov32,MAD_OUT_NV,EAX); -} - -//Reads : INPUTS,MAD_OUT -//Writes : EFREG,TEMP,FRC_REG,ADRS_REG,MEM_WT_DATA -void dsp_rec_EFO_FB(x86_block& x86e,_INST& op,u32 step,x86_gpr_reg INPUTS) -{ - nwtn(MAD_OUT); - //MAD_OUT is s32, no sign extension needed - x86e.Emit(op_mov32,EAX,&dsp.regs.MAD_OUT); - //sh .. l ? - switch(op.SHIFT) - { - case 0: - x86e.Emit(op_sar32,EAX,2); - //×1 Protected - x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288 - x86e.Emit(op_cmp32,EAX,EDX); - x86e.Emit(op_cmovl32,EAX,EDX); - x86e.Emit(op_neg32,EDX); - x86e.Emit(op_cmp32,EAX,EDX); - x86e.Emit(op_cmovg32,EAX,EDX); - //protect ! - break; - case 1: - //×2 Protected - x86e.Emit(op_sar32,EAX,1); - - x86e.Emit(op_mov32,EDX,(u32)-524288);//8388608//32768//524288 - x86e.Emit(op_cmp32,EAX,EDX); - x86e.Emit(op_cmovl32,EAX,EDX); - x86e.Emit(op_not32,EDX); - x86e.Emit(op_cmp32,EAX,EDX); - x86e.Emit(op_cmovg32,EAX,EDX); - //protect ! - break; - case 2: - //×2 Not protected - x86e.Emit(op_sar32,EAX,1); - dsp_rec_se(x86e,EAX,24); - break; - case 3: - //×1 Not protected - x86e.Emit(op_sar32,EAX,1); - x86e.Emit(op_shl32,EAX,2); - dsp_rec_se(x86e,EAX,24); - break; - } - - //Write EFREG ? - if (op.EWT) - { - x86e.Emit(op_mov32,EDX,EAX); - //top 16 bits ? or lower 16 ? - //i use top 16, following the same rule as the input - x86e.Emit(op_sar32,EDX,4); - - //write :) - x86e.Emit(op_mov16,&DSPData->EFREG[op.EWA],DX); - } - - //Write TEMPS ? - if (op.TWT) - { - //Temps is 24 bit, stored as s32 (no conversion required) - - //write it - x86e.Emit(op_mov32,dsp_reg_GenerateTempsAddrs(x86e,op.TWA,ECX),EAX); - } - - //COMMON TO FRC_REG and ADRS_REG - //interpolation mode : shift1=1=shift0 - //non interpolation : shift1!=1 && shift0!=1 ? ( why && ?) -- i implement it as || - - //Write to FRC_REG ? - if (op.FRCL) - { - if (op.SHIFT==3) - { - //FRC_REG[12:0]=Shift[23:11] - x86e.Emit(op_mov32,ECX,EAX); - x86e.Emit(op_sar32,ECX,11); - } - else - { - //FRC_REG[12:0]=0'Shift[11:0] - x86e.Emit(op_mov32,ECX,EAX); - x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 12 and up are 0'd - } - x86e.Emit(op_mov32,&dsp.regs.FRC_REG,ECX); - } - - //Write to ADDRS_REG ? - if (op.ADRL) - { - if (op.SHIFT==3) - { - //ADRS_REG[11:0]=Shift[23,23,23,23,23,22:16] - x86e.Emit(op_mov32,ECX,EAX); - x86e.Emit(op_shl32,ECX,8); //bit31=bit 23 - x86e.Emit(op_sar32,ECX,24); //bit 0 = bit16 (16+8=24) - } - else - { - //ADRS_REG[11:0]=0'Shift[23:12] - x86e.Emit(op_mov32,ECX,EAX); - x86e.Emit(op_sar32,ECX,12); - x86e.Emit(op_and32,ECX,(1<<12)-1);//bit 11 and up are 0'd - } - x86e.Emit(op_mov32,&dsp.regs.ADRS_REG,ECX); - } - - //MEM_WT_DATA write - //This kills off any non protected regs (EAX,EDX,ECX) - { - //pack ? - if (!op.NOFL && SUPPORT_NOFL) - { //yes - x86e.Emit(op_mov32,ECX,EAX); - x86e.Emit(op_call,x86_ptr_imm(PACK)); - } - else - { //shift (look @ EFREG write for more info) - x86e.Emit(op_sar32,EAX,8); - } - //data in on EAX - x86e.Emit(op_mov32,&dsp.regs.MEM_WT_DATA,EAX); - } - - //more stuff here - wtn(EFREG); - wtn(TEMPS); - wtn(FRC_REG); - wtn(ADRS_REG); - wtn(MEM_WT_DATA); -} -void dsp_recompile() -{ - dsp.dyndirty=false; - - x86_block x86e; - x86e.Init(dyna_realloc,dyna_realloc); - - x86e.Emit(op_push32,EBX); - x86e.Emit(op_push32,EBP); - x86e.Emit(op_push32,ESI); - x86e.Emit(op_push32,EDI); - - //OK. - //Input comes from mems, mixs and exts, as well as possible memory reads and writes - //mems is read/write (memory loads go there), mixs and exts are read only. - //There are various delays (registers) so i need to properly emulate (more on that later) - - //Registers that can be written : MIXS,FRC_REG,ADRS_REG,EFREG,TEMP - - //MRD, MWT, NOFL, TABLE, NXADR, ADREB, and MASA[4:0] - //Only allowed on odd steps, when counting from 1 (2,4,6, ...).That is even steps when counting from 0 (1,3,5, ...) - for(int step=0;step<128;++step) - { - u32* mpro=DSPData->MPRO+step*4; - u32 prev_step=(step-1)&127; - u32* prev_mpro=DSPData->MPRO+prev_step*4; - //if its a nop just go to the next opcode - //No, don't really do that, we need to propage opcode bits :p - //if (mpro[0]==0 && mpro[1]==0 && mpro[2]== 0 && mpro[3]==0) - // continue; - - _INST op; - _INST prev_op; - DecodeInst(mpro,&op); - DecodeInst(prev_mpro,&prev_op); - - //printf("[%d] " - // "TRA %d,TWT %d,TWA %d,XSEL %d,YSEL %d,IRA %d,IWT %d,IWA %d,TABLE %d,MWT %d,MRD %d,EWT %d,EWA %d,ADRL %d,FRCL %d,SHIFT %d,YRL %d,NEGB %d,ZERO %d,BSEL %d,NOFL %d,MASA %d,ADREB %d,NXADR %d\n" - // ,step - // ,op.TRA,op.TWT,op.TWA,op.XSEL,op.YSEL,op.IRA,op.IWT,op.IWA,op.TABLE,op.MWT,op.MRD,op.EWT,op.EWA,op.ADRL,op.FRCL,op.SHIFT,op.YRL,op.NEGB,op.ZERO,op.BSEL,op.NOFL,op.MASA,op.ADREB,op.NXADR); - - //Dynarec ! - _dsp_debug_step_start(); - //DSP regs are on memory - //Wires stay on x86 regs, written to memory as fast as possible - - //EDI=MEM_RD_DATA_NV - dsp_rec_DRAM_CI(x86e,prev_op,step,EDI); - - //;) - //Address Generation Unit ! nothing spectacular really ... - dsp_rec_MEM_AGU(x86e,op,step); - - //Calculate INPUTS wire - //ESI : INPUTS - dsp_rec_INPUTS(x86e,op,ESI); - - //:o ? - //Write the MEMS register - dsp_rec_MEMS_WRITE(x86e,op,step,ESI); - - //Write the MEM_RD_DATA regiter - //Last use of MEM_RD_DATA_NV(EDI) - dsp_rec_MEM_RD_DATA_WRITE(x86e,op,step,EDI); - //EDI is now free :D - - //EDI is used for MAD_OUT_NV - //Mul-add - dsp_rec_MAD(x86e,op,step,ESI,EDI); - - //Effect output/ Feedback - dsp_rec_EFO_FB(x86e,op,step,ESI); - - //Write MAD_OUT_NV - { - x86e.Emit(op_mov32,&dsp.regs.MAD_OUT,EDI); - wtn(MAD_OUT); - } - //These are implemented here :p - - //Inputs -> Y reg - //Last use of inputs (ESI) and its destructive at that ;p - { - if (op.YRL) - { - x86e.Emit(op_sar32,ESI,4);//[23:4] - x86e.Emit(op_mov32,&dsp.regs.Y_REG,ESI); - - } - wtn(Y_REG); - } - - //NOFL delay propagation :) - { - //NOFL_2=NOFL_1 - x86e.Emit(op_mov32,EAX,&dsp.regs.NOFL_1); - x86e.Emit(op_mov32,&dsp.regs.NOFL_2,EAX); - //NOFL_1 = NOFL - x86e.Emit(op_mov32,&dsp.regs.NOFL_1,op.NOFL); - - wtn(NOFL_2); - wtn(NOFL_1); - } - - //MWT_1/MRD_1 propagation - { - //MWT_1=MWT - x86e.Emit(op_mov32,&dsp.regs.MWT_1,op.MWT); - //MRD_1=MRD - x86e.Emit(op_mov32,&dsp.regs.MRD_1,op.MRD); - - wtn(MWT_1); - wtn(MRD_1); - } - - _dsp_debug_step_end(); - } - - //Need to decrement MDEC_CT here :) - x86e.Emit(op_pop32,EDI); - x86e.Emit(op_pop32,ESI); - x86e.Emit(op_pop32,EBP); - x86e.Emit(op_pop32,EBX); - x86e.Emit(op_ret); - x86e.Generate(); -} + DSPJitX86(u8* aica_ram, u32 aram_size) : aica_ram(aica_ram), aram_mask(aram_size-1) { +#if HOST_OS == OS_WINDOWS + DWORD old; + VirtualProtect(dsp.DynCode, sizeof(dsp.DynCode), PAGE_EXECUTE_READWRITE, &old); +#endif + } + const bool SUPPORT_NOFL = false; -void dsp_print_mame(); -void dsp_step_mame(); -void dsp_emu_grandia(); -void dsp_step() -{ - //clear output reg - memset(DSPData->EFREG,0,sizeof(DSPData->EFREG)); +#define assert verify - if (dsp.dyndirty) - { - dsp.dyndirty=false; - //dsp_print_mame(); - dsp_recompile(); - } - //dsp_step_mame(); - //dsp_emu_grandia(); - - //run the code :p - ((void (*)())&dsp.DynCode)(); - - dsp.regs.MDEC_CT--; - if (dsp.regs.MDEC_CT==0) - dsp.regs.MDEC_CT=dsp.RBL; - //here ? or before ? - //memset(DSP->MIXS,0,4*16); -} +#pragma warning(disable:4311) -void dsp_writenmem(u32 addr) -{ - addr-=0x3000; - //COEF : native - //MEMS : native - //MPRO : native - if (addr>=0x400 && addr<0xC00) - { - dsp.dyndirty=true; - } - - /* - //buffered DSP state - //24 bit wide - u32 TEMP[128]; - //24 bit wide - u32 MEMS[32]; - //20 bit wide - s32 MIXS[16]; - */ -} + /* + //#define USEFLOATPACK + //pack s24 to s1e4s11 + naked u16 packasm(s32 val) + { + __asm + { + mov edx,ecx; //eax will be sign + and edx,0x80000; //get the sign + + jz poz; + neg ecx; + + poz: + bsr eax,ecx; + jz _zero; + + //24 -> 11 + //13 -> 0 + //12..0 -> 0 + sub eax,11; + cmovs eax,0; //if <0 -> 0 + + shr ecx,eax; //shift out mantissa as needed (yeah i know, no rounding here and all .. ) + + shr eax,12; //[14:12] is exp + or edx,ecx; //merge [15] | [11:0] + or eax,edx; //merge [14:12] | ([15] | [11:0]), result on eax + ret; + + _zero: + xor eax,eax; + ret; + } + } + //ONLY lower 16 bits are valid, rest are ignored but do movzx to avoid partial stalls :) + naked s32 unpackasm(u32 val) + { + __asm + { + mov eax,ecx; //get mantissa bits + and ecx,0x7FF; // + + shl eax,11; //get shift factor (shift) + mov edx,eax; //keep a copy for the sign + and eax,0xF; //get shift factor (mask) + + shl ecx,eax; //shift mantissa to normal position + + test edx,0x10; //signed ? + jnz _negme; + + ret; //nop, return as is + + _negme: + //yep, negate and return + neg eax; + ret; + + } + }*/ + + static void* dyna_realloc(void* ptr, u32 oldsize, u32 newsize) + { + return dsp.DynCode; + } + + void _dsp_debug_step_start() + { + memset(&dsp.regs_init, 0, sizeof(dsp.regs_init)); + } + + void _dsp_debug_step_end() + { + verify(dsp.regs_init.MAD_OUT); + verify(dsp.regs_init.MEM_ADDR); + verify(dsp.regs_init.MEM_RD_DATA); + verify(dsp.regs_init.MEM_WT_DATA); + verify(dsp.regs_init.FRC_REG); + verify(dsp.regs_init.ADRS_REG); + verify(dsp.regs_init.Y_REG); + + //verify(dsp.regs_init.MDEC_CT); // -> its done on C + verify(dsp.regs_init.MWT_1); + verify(dsp.regs_init.MRD_1); + // verify(dsp.regs_init.MADRS); //THAT WAS not real, MEM_ADDR is the deal ;p + verify(dsp.regs_init.MEMS); + verify(dsp.regs_init.NOFL_1); + verify(dsp.regs_init.NOFL_2); + verify(dsp.regs_init.TEMPS); + verify(dsp.regs_init.EFREG); + } -void dsp_readmem(u32 addr) -{ - //nothing ? :p -} +#define nwtn(x) verify(!dsp.regs_init.x) +#define wtn(x) nwtn(x);dsp.regs_init.x=true; -void dsp_term() -{ + //sign extend to 32 bits + void dsp_rec_se(x86_block& x86e, x86_gpr_reg reg, u32 src_sz, u32 dst_sz = 0xFF) + { + if (dst_sz == 0xFF) + dst_sz = src_sz; + //24 -> 32 (pad to 32 bits) + x86e.Emit(op_shl32, reg, 32 - src_sz); + //32 -> 24 (MSB propagation) + x86e.Emit(op_sar32, reg, 32 - dst_sz); + } + + //Reads : MWT_1,MRD_1,MEM_ADDR + //Writes : Wire MEM_RD_DATA_NV + void dsp_rec_DRAM_CI(x86_block& x86e, _INST& prev_op, u32 step, x86_gpr_reg MEM_RD_DATA_NV) + { + nwtn(MWT_1); + nwtn(MRD_1); + nwtn(MEM_ADDR); + nwtn(MEM_WT_DATA); + + //Request : step x (odd step) + //Operation : x+1 (even step) + //Data avail : x+2 (odd step, can request again) + if (!(step & 1)) + { + //Get and mask ram address :) + x86e.Emit(op_mov32, EAX, &dsp.regs.MEM_ADDR); + x86e.Emit(op_and32, EAX, aram_mask); + + x86e.Emit(op_add32, EAX, (unat)aica_ram); + + //prev. opcode did a mem read request ? + if (prev_op.MRD) + { + //Do the read [MEM_ADDRS] -> MEM_RD_DATA_NV + x86e.Emit(op_movsx16to32, MEM_RD_DATA_NV, x86_mrm(EAX)); + } + //prev. opcode did a mem write request ? + if (prev_op.MWT) + { + //Do the write [MEM_ADDRS] <-MEM_WT_DATA + x86e.Emit(op_mov32, EDX, &dsp.regs.MEM_WT_DATA); + x86e.Emit(op_mov16, x86_mrm(EAX), EDX); + } + } + } + + //Reads : ADRS_REG,MADRS,MDEC_CT + //Writes : MEM_ADDR + void dsp_rec_MEM_AGU(x86_block& x86e, _INST& op, u32 step) + { + nwtn(ADRS_REG); + nwtn(MEM_ADDR); + + //These opcode fields are valid on odd steps (mem req. is only allowed then) + //MEM Request : step x + //Mem operation : step x+1 (address is available at this point) + if (step & 1) + { + //Addrs is 16:1 + x86e.Emit(op_mov32, EAX, &DSPData->MADRS[op.MASA]); + + //Added if ADREB + if (op.ADREB) + x86e.Emit(op_add32, EAX, &dsp.regs.ADRS_REG); + + //+1 if NXADR is set + if (op.NXADR) + x86e.Emit(op_add32, EAX, 1); + + //RBL warp around is here, according to docs, but that seems to cause _very_ bad results + // if (!op.TABLE) + // x86e.Emit(op_and32,EAX,dsp.RBL); + + //MDEC_CT is added if !TABLE + if (!op.TABLE) + x86e.Emit(op_add32, EAX, &dsp.regs.MDEC_CT); + + //RBL/RBP are constants for the program + //Apply RBL if !TABLE + //Else limit to 16 bit add + //*update* always limit to 16 bit add adter MDEC_CT ? + if (!op.TABLE) + x86e.Emit(op_and32, EAX, dsp.RBL); + else + x86e.Emit(op_and32, EAX, 0xFFFF); + + //Calculate the value ! + //EAX*2 b/c it points to sample (16:1 of the address) + x86e.Emit(op_lea32, EDX, x86_mrm(EAX, sib_scale_2, x86_ptr::create(dsp.RBP))); + + //Save the result to MEM_ADDR + x86e.Emit(op_mov32, &dsp.regs.MEM_ADDR, EDX); + } + wtn(MEM_ADDR); + } + + //Reads : MEMS,MIXS,EXTS + //Writes : INPUTS (Wire) + void dsp_rec_INPUTS(x86_block& x86e, _INST& op, x86_gpr_reg INPUTS) + { + nwtn(MEMS); + + //nwtn(MIXS); -> these are read only :) + //nwtn(EXTS); + + //INPUTS is 24 bit, we convert everything to that + //Maby we dont need to convert, but just to sign extend ? + if (op.IRA < 0x20) + { + x86e.Emit(op_mov32, INPUTS, &dsp.MEMS[op.IRA]); + dsp_rec_se(x86e, INPUTS, 24); + } + else if (op.IRA < 0x30) + { + x86e.Emit(op_mov32, INPUTS, &dsp.MIXS[op.IRA - 0x20]); + dsp_rec_se(x86e, INPUTS, 20, 24); + } + else if (op.IRA < 0x32) + { + x86e.Emit(op_mov32, ESI, &DSPData->EXTS[op.IRA - 0x30]); + //x86e.Emit(op_shl32,INPUTS,8); + dsp_rec_se(x86e, INPUTS, 16, 24); + } + + //Sign extend to 32 bits + //dsp_rec_se(x86e,INPUTS,24); + } + + //Reads : MEM_RD_DATA,NO_FLT2 + //Writes : MEMS + void dsp_rec_MEMS_WRITE(x86_block& x86e, _INST& op, u32 step, x86_gpr_reg INPUTS) + { + nwtn(MEM_RD_DATA); + nwtn(NOFL_2); + + //MEMS write reads from MEM_RD_DATA register (MEM_RD_DATA -> Converter -> MEMS). + //The converter's nofl flag has 2 steps delay (so that it can be set with the MRQ). + if (op.IWT) + { + x86e.Emit(op_movsx16to32, ECX, &dsp.regs.MEM_RD_DATA); + x86e.Emit(op_mov32, EAX, ECX); + + //Pad and signed extend EAX + //x86e.Emit(op_shl32,EAX,16); + //x86e.Emit(op_sar32,EAX,8); + x86e.Emit(op_shl32, EAX, 8); + + if (SUPPORT_NOFL) + { + x86_Label* no_fl = x86e.CreateLabel(false, 8);//no float conversions + + //Do we have to convert ? + x86e.Emit(op_cmp32, &dsp.regs.NOFL_2, 1); + x86e.Emit(op_je, no_fl); + { + //Convert ! + x86e.Emit(op_call, x86_ptr_imm(UNPACK)); + } + x86e.MarkLabel(no_fl); + } + x86e.Emit(op_mov32, &dsp.MEMS[op.IWA], EAX); + } + + wtn(MEMS); + } + + //Reads : MEM_RD_DATA_NV (Wire) + //Writes : MEM_RD_DATA + void dsp_rec_MEM_RD_DATA_WRITE(x86_block& x86e, _INST& op, u32 step, x86_gpr_reg MEM_RD_DATA_NV) + { + //Request : step x (odd step) + //Operation : x+1 (even step) + //Data avail : x+2 (odd step, can request again) + //The MEM_RD_DATA_NV wire exists only on even steps + if (!(step & 1)) + { + x86e.Emit(op_mov32, &dsp.regs.MEM_RD_DATA, MEM_RD_DATA_NV); + } + + wtn(MEM_RD_DATA); + } + + x86_mrm_t dsp_reg_GenerateTempsAddrs(x86_block& x86e, u32 TEMPS_NUM, x86_gpr_reg TEMPSaddrsreg) + { + x86e.Emit(op_mov32, TEMPSaddrsreg, &dsp.regs.MDEC_CT); + x86e.Emit(op_add32, TEMPSaddrsreg, TEMPS_NUM); + x86e.Emit(op_and32, TEMPSaddrsreg, 127); + return x86_mrm(ECX, sib_scale_4, dsp.TEMP); + } + + //Reads : INPUTS,TEMP,FRC_REG,COEF,Y_REG + //Writes : MAD_OUT_NV (Wire) + void dsp_rec_MAD(x86_block& x86e, _INST& op, u32 step, x86_gpr_reg INPUTS, x86_gpr_reg MAD_OUT_NV) + { + bool use_TEMP = op.XSEL == 0 || (op.BSEL == 0 && op.ZERO == 0); + + //TEMPS (if used) on ECX + const x86_gpr_reg TEMPS_reg = ECX; + if (use_TEMP) + { + //read temps + x86e.Emit(op_mov32, TEMPS_reg, dsp_reg_GenerateTempsAddrs(x86e, op.TRA, TEMPS_reg)); + dsp_rec_se(x86e, TEMPS_reg, 24); + } + + x86_reg mul_x_input; + //X : 24 bits + if (op.XSEL == 1) + { + //X=INPUTS + mul_x_input = INPUTS; + //x86e.Emit(op_mov32,EDX,INPUTS); + } + else + { + //X=TEMPS + mul_x_input = TEMPS_reg; + //x86e.Emit(op_mov32,EDX,TEMPS_reg); + } + + //MUL Y in : EAX + //Y : 13 bits + switch (op.YSEL) + { + case 0: + //Y=FRC_REG[13] + x86e.Emit(op_mov32, EAX, &dsp.regs.FRC_REG); + dsp_rec_se(x86e, EAX, 13); + break; + + case 1: + //Y=COEF[13] + x86e.Emit(op_mov32, EAX, &DSPData->COEF[step]); + dsp_rec_se(x86e, EAX, 16, 13); + break; + + case 2: + //Y=Y_REG[23:11] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 19:7) + x86e.Emit(op_mov32, EAX, &dsp.regs.Y_REG); + dsp_rec_se(x86e, EAX, 19, 13); + break; + + case 3: + //Y=0'Y_REG[15:4] (Y_REG is 19 bits, INPUTS[23:4], so that is realy 11:0) + x86e.Emit(op_mov32, EAX, &dsp.regs.Y_REG); + x86e.Emit(op_and32, 0xFFF);//Clear bit 13+ + break; + } + + //Do the mul -- maby it has overflow protection ? + //24+13=37, -11 = 26 + //that can be >>1 or >>2 on the shifter after the mul + x86e.Emit(op_imul32, mul_x_input); + //*NOTE* here, shrd is unsigned, but we have EDX signed, and we may only shift up to 11 bits from it + //so it works just fine :) + x86e.Emit(op_shrd32, EAX, EDX, 10); + + //cut the upper bits so that it is 26 bits signed + dsp_rec_se(x86e, EAX, 26); + + //Adder, takes MUL_OUT at EAX + //Adds B (EDX) + //Outputs EAX + + if (!op.ZERO) //if zero is set the adder has no effect + { + if (op.BSEL == 1) + { + //B=MAD_OUT[??] + //mad out is stored on s32 format, so no need for sign extension + x86e.Emit(op_mov32, EDX, &dsp.regs.MAD_OUT); + } + else + { + //B=TEMP[??] + //TEMPS is already sign extended, so no need for it + //Just converting 24 -> 26 bits using lea + x86e.Emit(op_lea32, EDX, x86_mrm(TEMPS_reg, sib_scale_4, 0)); + } + //Gating is applied here normally (ZERO). + //NEGB then inverts the value (NOT) (or 0 , if gated) and the adder adds +1 if NEGB is set. + //However, (~X)+1 = -X , and (~0)+1=0 so i skip the add + if (op.NEGB) + { + x86e.Emit(op_neg32, EDX); + } + + //Add hm, is there overflow protection here ? + //The result of mul is on EAX, we modify that + x86e.Emit(op_add32, EAX, EDX); + } + + //cut the upper bits so that it is 26 bits signed + dsp_rec_se(x86e, EAX, 26); + + //Write to MAD_OUT_NV wire :) + x86e.Emit(op_mov32, MAD_OUT_NV, EAX); + } + + //Reads : INPUTS,MAD_OUT + //Writes : EFREG,TEMP,FRC_REG,ADRS_REG,MEM_WT_DATA + void dsp_rec_EFO_FB(x86_block& x86e, _INST& op, u32 step, x86_gpr_reg INPUTS) + { + nwtn(MAD_OUT); + //MAD_OUT is s32, no sign extension needed + x86e.Emit(op_mov32, EAX, &dsp.regs.MAD_OUT); + //sh .. l ? + switch (op.SHIFT) + { + case 0: + x86e.Emit(op_sar32, EAX, 2); + //×1 Protected + x86e.Emit(op_mov32, EDX, (u32)-524288);//8388608//32768//524288 + x86e.Emit(op_cmp32, EAX, EDX); + x86e.Emit(op_cmovl32, EAX, EDX); + x86e.Emit(op_neg32, EDX); + x86e.Emit(op_cmp32, EAX, EDX); + x86e.Emit(op_cmovg32, EAX, EDX); + //protect ! + break; + case 1: + //×2 Protected + x86e.Emit(op_sar32, EAX, 1); + + x86e.Emit(op_mov32, EDX, (u32)-524288);//8388608//32768//524288 + x86e.Emit(op_cmp32, EAX, EDX); + x86e.Emit(op_cmovl32, EAX, EDX); + x86e.Emit(op_not32, EDX); + x86e.Emit(op_cmp32, EAX, EDX); + x86e.Emit(op_cmovg32, EAX, EDX); + //protect ! + break; + case 2: + //×2 Not protected + x86e.Emit(op_sar32, EAX, 1); + dsp_rec_se(x86e, EAX, 24); + break; + case 3: + //×1 Not protected + x86e.Emit(op_sar32, EAX, 1); + x86e.Emit(op_shl32, EAX, 2); + dsp_rec_se(x86e, EAX, 24); + break; + } + + //Write EFREG ? + if (op.EWT) + { + x86e.Emit(op_mov32, EDX, EAX); + //top 16 bits ? or lower 16 ? + //i use top 16, following the same rule as the input + x86e.Emit(op_sar32, EDX, 4); + + //write :) + x86e.Emit(op_mov16, &DSPData->EFREG[op.EWA], DX); + } + + //Write TEMPS ? + if (op.TWT) + { + //Temps is 24 bit, stored as s32 (no conversion required) + + //write it + x86e.Emit(op_mov32, dsp_reg_GenerateTempsAddrs(x86e, op.TWA, ECX), EAX); + } + + //COMMON TO FRC_REG and ADRS_REG + //interpolation mode : shift1=1=shift0 + //non interpolation : shift1!=1 && shift0!=1 ? ( why && ?) -- i implement it as || + + //Write to FRC_REG ? + if (op.FRCL) + { + if (op.SHIFT == 3) + { + //FRC_REG[12:0]=Shift[23:11] + x86e.Emit(op_mov32, ECX, EAX); + x86e.Emit(op_sar32, ECX, 11); + } + else + { + //FRC_REG[12:0]=0'Shift[11:0] + x86e.Emit(op_mov32, ECX, EAX); + x86e.Emit(op_and32, ECX, (1 << 12) - 1);//bit 12 and up are 0'd + } + x86e.Emit(op_mov32, &dsp.regs.FRC_REG, ECX); + } + + //Write to ADDRS_REG ? + if (op.ADRL) + { + if (op.SHIFT == 3) + { + //ADRS_REG[11:0]=Shift[23,23,23,23,23,22:16] + x86e.Emit(op_mov32, ECX, EAX); + x86e.Emit(op_shl32, ECX, 8); //bit31=bit 23 + x86e.Emit(op_sar32, ECX, 24); //bit 0 = bit16 (16+8=24) + } + else + { + //ADRS_REG[11:0]=0'Shift[23:12] + x86e.Emit(op_mov32, ECX, EAX); + x86e.Emit(op_sar32, ECX, 12); + x86e.Emit(op_and32, ECX, (1 << 12) - 1);//bit 11 and up are 0'd + } + x86e.Emit(op_mov32, &dsp.regs.ADRS_REG, ECX); + } + + //MEM_WT_DATA write + //This kills off any non protected regs (EAX,EDX,ECX) + { + //pack ? + if (!op.NOFL && SUPPORT_NOFL) + { //yes + x86e.Emit(op_mov32, ECX, EAX); + x86e.Emit(op_call, x86_ptr_imm(PACK)); + } + else + { //shift (look @ EFREG write for more info) + x86e.Emit(op_sar32, EAX, 8); + } + //data in on EAX + x86e.Emit(op_mov32, &dsp.regs.MEM_WT_DATA, EAX); + } + + //more stuff here + wtn(EFREG); + wtn(TEMPS); + wtn(FRC_REG); + wtn(ADRS_REG); + wtn(MEM_WT_DATA); + } + + void Recompile() + { + x86_block x86e; + x86e.Init(dyna_realloc, dyna_realloc); + + x86e.Emit(op_push32, EBX); + x86e.Emit(op_push32, EBP); + x86e.Emit(op_push32, ESI); + x86e.Emit(op_push32, EDI); + + //OK. + //Input comes from mems, mixs and exts, as well as possible memory reads and writes + //mems is read/write (memory loads go there), mixs and exts are read only. + //There are various delays (registers) so i need to properly emulate (more on that later) + + //Registers that can be written : MIXS,FRC_REG,ADRS_REG,EFREG,TEMP + + //MRD, MWT, NOFL, TABLE, NXADR, ADREB, and MASA[4:0] + //Only allowed on odd steps, when counting from 1 (2,4,6, ...).That is even steps when counting from 0 (1,3,5, ...) + for (int step = 0; step < 128; ++step) + { + u32* mpro = DSPData->MPRO + step * 4; + u32 prev_step = (step - 1) & 127; + u32* prev_mpro = DSPData->MPRO + prev_step * 4; + //if its a nop just go to the next opcode + //No, don't really do that, we need to propage opcode bits :p + //if (mpro[0]==0 && mpro[1]==0 && mpro[2]== 0 && mpro[3]==0) + // continue; + + _INST op; + _INST prev_op; + DecodeInst(mpro, &op); + DecodeInst(prev_mpro, &prev_op); + + //printf("[%d] " + // "TRA %d,TWT %d,TWA %d,XSEL %d,YSEL %d,IRA %d,IWT %d,IWA %d,TABLE %d,MWT %d,MRD %d,EWT %d,EWA %d,ADRL %d,FRCL %d,SHIFT %d,YRL %d,NEGB %d,ZERO %d,BSEL %d,NOFL %d,MASA %d,ADREB %d,NXADR %d\n" + // ,step + // ,op.TRA,op.TWT,op.TWA,op.XSEL,op.YSEL,op.IRA,op.IWT,op.IWA,op.TABLE,op.MWT,op.MRD,op.EWT,op.EWA,op.ADRL,op.FRCL,op.SHIFT,op.YRL,op.NEGB,op.ZERO,op.BSEL,op.NOFL,op.MASA,op.ADREB,op.NXADR); + + //Dynarec ! + _dsp_debug_step_start(); + //DSP regs are on memory + //Wires stay on x86 regs, written to memory as fast as possible + + //EDI=MEM_RD_DATA_NV + dsp_rec_DRAM_CI(x86e, prev_op, step, EDI); + + //;) + //Address Generation Unit ! nothing spectacular really ... + dsp_rec_MEM_AGU(x86e, op, step); + + //Calculate INPUTS wire + //ESI : INPUTS + dsp_rec_INPUTS(x86e, op, ESI); + + //:o ? + //Write the MEMS register + dsp_rec_MEMS_WRITE(x86e, op, step, ESI); + + //Write the MEM_RD_DATA regiter + //Last use of MEM_RD_DATA_NV(EDI) + dsp_rec_MEM_RD_DATA_WRITE(x86e, op, step, EDI); + //EDI is now free :D + + //EDI is used for MAD_OUT_NV + //Mul-add + dsp_rec_MAD(x86e, op, step, ESI, EDI); + + //Effect output/ Feedback + dsp_rec_EFO_FB(x86e, op, step, ESI); + + //Write MAD_OUT_NV + { + x86e.Emit(op_mov32, &dsp.regs.MAD_OUT, EDI); + wtn(MAD_OUT); + } + //These are implemented here :p + + //Inputs -> Y reg + //Last use of inputs (ESI) and its destructive at that ;p + { + if (op.YRL) + { + x86e.Emit(op_sar32, ESI, 4);//[23:4] + x86e.Emit(op_mov32, &dsp.regs.Y_REG, ESI); + + } + wtn(Y_REG); + } + + //NOFL delay propagation :) + { + //NOFL_2=NOFL_1 + x86e.Emit(op_mov32, EAX, &dsp.regs.NOFL_1); + x86e.Emit(op_mov32, &dsp.regs.NOFL_2, EAX); + //NOFL_1 = NOFL + x86e.Emit(op_mov32, &dsp.regs.NOFL_1, op.NOFL); + + wtn(NOFL_2); + wtn(NOFL_1); + } + + //MWT_1/MRD_1 propagation + { + //MWT_1=MWT + x86e.Emit(op_mov32, &dsp.regs.MWT_1, op.MWT); + //MRD_1=MRD + x86e.Emit(op_mov32, &dsp.regs.MRD_1, op.MRD); + + wtn(MWT_1); + wtn(MRD_1); + } + + _dsp_debug_step_end(); + } + + //Need to decrement MDEC_CT here :) + x86e.Emit(op_pop32, EDI); + x86e.Emit(op_pop32, ESI); + x86e.Emit(op_pop32, EBP); + x86e.Emit(op_pop32, EBX); + x86e.Emit(op_ret); + x86e.Generate(); + } + + + + void dsp_print_mame(); + void dsp_step_mame(); + void dsp_emu_grandia(); + + void Step() + { + //clear output reg + memset(DSPData->EFREG, 0, sizeof(DSPData->EFREG)); + + //dsp_step_mame(); + //dsp_emu_grandia(); + + //run the code :p + ((void (*)()) &dsp.DynCode[0])(); + + dsp.regs.MDEC_CT--; + if (dsp.regs.MDEC_CT == 0) + dsp.regs.MDEC_CT = dsp.RBL; + //here ? or before ? + //memset(DSP->MIXS,0,4*16); + } +}; + +DSPBackend* DSPBackend::CreateJIT(u8* aica_ram, u32 aram_size) { + return new DSPJitX86(aica_ram, aram_size); } #endif diff --git a/libswirl/hw/aica/sgc_if.cpp b/libswirl/hw/aica/sgc_if.cpp index c3595909f0..6582aaea86 100644 --- a/libswirl/hw/aica/sgc_if.cpp +++ b/libswirl/hw/aica/sgc_if.cpp @@ -1,5 +1,6 @@ #include "sgc_if.h" #include "dsp.h" +#include "dsp_backend.h" #include "aica_mem.h" #include #include @@ -269,6 +270,7 @@ struct ChannelEx static ChannelEx Chans[64]; ChannelCommonData* ccd; + u8* aica_ram; u8* SA; u32 CA; @@ -358,11 +360,15 @@ struct ChannelEx bool enabled; //set to false to 'freeze' the channel int ChanelNumber; + + void Setup(int cn, u8* ccd_raw, u8* aica_ram) { + ccd = (ChannelCommonData*)&ccd_raw[cn * 0x80]; + ChanelNumber = cn; + this->aica_ram = aica_ram; + } - void Init(int cn,u8* ccd_raw) + void Init() { - ccd=(ChannelCommonData*)&ccd_raw[cn*0x80]; - ChanelNumber=cn; for (u32 i=0;i<0x80;i++) RegWrite(i); disable(); @@ -529,7 +535,7 @@ struct ChannelEx if (ccd->PCMS==0) addr&=~1; //0: 16 bit - SA=&aica_ram.data[addr]; + SA=&aica_ram[addr]; } //LSA,LEA void UpdateLoop() @@ -1086,8 +1092,13 @@ u32 CalcAegSteps(float t) double steps=aeg_allsteps/scnt; return (u32)(steps+0.5); } -void sgc_Init() +static u8* aica_reg; +static u32 aram_mask; +void sgc_Init(u8* aica_reg, u8* aica_ram, u32 aram_size) { + ::aica_reg = aica_reg; + ::aram_mask = aram_size - 1; + staticinitialise(); for (int i=0;i<16;i++) @@ -1111,16 +1122,18 @@ void sgc_Init() AEG_ATT_SPS[i]=CalcAegSteps(AEG_Attack_Time[i]); AEG_DSR_SPS[i]=CalcAegSteps(AEG_DSR_Time[i]); } - for (int i=0;i<64;i++) - Chans[i].Init(i,aica_reg); - dsp_out_vol=(DSP_OUT_VOL_REG*)&aica_reg[0x2000]; + for (int i = 0; i < 64; i++) + Chans[i].Setup(i, aica_reg, aica_ram); + + for (int i = 0; i < 64; i++) + Chans[i].Init(); - dsp_init(); + dsp_out_vol=(DSP_OUT_VOL_REG*)&aica_reg[0x2000]; } void sgc_Term() { - dsp_term(); + } void WriteChannelReg8(u32 channel,u32 reg) @@ -1169,7 +1182,7 @@ void WriteCommonReg8(u32 reg,u32 data) if (reg==0x2804 || reg==0x2805) { dsp.RBL=(8192<RBL)-1; - dsp.RBP=( CommonData->RBP*2048&AICA_RAM_MASK); + dsp.RBP=( (CommonData->RBP*2048) & aram_mask); dsp.dyndirty=true; } } @@ -1345,7 +1358,7 @@ void AICA_Sample() } //if (settings.aica.DSPEnabled) { - dsp_step(); + libDSP_Step(); for (int i=0;i<16;i++) { @@ -1406,7 +1419,7 @@ bool channel_serialize(void **data, unsigned int *total_size) for ( i = 0 ; i < 64 ; i++) { - addr = Chans[i].SA - (&(aica_ram.data[0])) ; + addr = Chans[i].SA - Chans[i].aica_ram ; REICAST_S(addr); REICAST_S(Chans[i].CA) ; @@ -1461,7 +1474,7 @@ bool channel_unserialize(void **data, unsigned int *total_size) for ( i = 0 ; i < 64 ; i++) { REICAST_US(addr); - Chans[i].SA = addr + (&(aica_ram.data[0])) ; + Chans[i].SA = addr + Chans[i].aica_ram; REICAST_US(Chans[i].CA) ; REICAST_US(Chans[i].step) ; diff --git a/libswirl/hw/aica/sgc_if.h b/libswirl/hw/aica/sgc_if.h index 151987d6cb..5af48cc891 100644 --- a/libswirl/hw/aica/sgc_if.h +++ b/libswirl/hw/aica/sgc_if.h @@ -7,7 +7,7 @@ void AICA_Sample32(); //u32 ReadChannelReg(u32 channel,u32 reg); void WriteChannelReg8(u32 channel,u32 reg); -void sgc_Init(); +void sgc_Init(u8* aica_reg, u8* aica_ram, u32 aram_size); void sgc_Term(); union fp_22_10 diff --git a/libswirl/hw/arm7/SoundCPU.cpp b/libswirl/hw/arm7/SoundCPU.cpp index cf7c54715c..b3d1f60729 100644 --- a/libswirl/hw/arm7/SoundCPU.cpp +++ b/libswirl/hw/arm7/SoundCPU.cpp @@ -1,18 +1,151 @@ // nullAICA.cpp : Defines the entry point for the DLL application. // -#include "types.h" +#include "SoundCPU.h" #include "arm7.h" +#include "arm7_context.h" #include "arm_mem.h" +#include "hw/aica/aica_mmio.h" + +#include + +#define REG_L (0x2D00) +#define REG_M (0x2D04) + +#define N_FLAG regs[RN_PSR_FLAGS].FLG.N +#define Z_FLAG regs[RN_PSR_FLAGS].FLG.Z +#define C_FLAG regs[RN_PSR_FLAGS].FLG.C +#define V_FLAG regs[RN_PSR_FLAGS].FLG.V + + +/* + --Seems like aica has 3 interrupt controllers actualy (damn lazy sega ..) + The "normal" one (the one that exists on scsp) , one to emulate the 68k intc , and , + of course , the arm7 one + + The output of the sci* bits is input to the e68k , and the output of e68k is inputed into the FIQ + pin on arm7 +*/ + +static void update_e68k(Arm7Context* ctx) +{ + if (!ctx->e68k_out && ctx->aica_interr) + { + //Set the pending signal + //Is L register held here too ? + ctx->e68k_out = true; + ctx->e68k_reg_L = ctx->aica_reg_L; + + ctx->backend->UpdateInterrupts(); + } +} + +static void e68k_AcceptInterrupt(Arm7Context* ctx) +{ + ctx->e68k_out = false; + update_e68k(ctx); + +} + +//Reg reads from arm side .. +template +static T arm_ReadReg(Arm7Context* ctx, u32 addr) +{ + addr &= 0x7FFF; + if (addr == REG_L) + return ctx->e68k_reg_L; + else if (addr == REG_M) + return ctx->e68k_reg_M; //shouldn't really happen + else + return ctx->aica->ReadReg(addr, sz); +} + +template +static void arm_WriteReg(Arm7Context* ctx, u32 addr, T data) +{ + addr &= 0x7FFF; + if (addr == REG_L) + { + return; // Shouldn't really happen (read only) + } + else if (addr == REG_M) + { + //accept interrupts + if (data & 1) + e68k_AcceptInterrupt(ctx); + } + else + { + return ctx->aica->WriteReg(addr, data, sz); + } +} + +//00000000~007FFFFF @DRAM_AREA* +//00800000~008027FF @CHANNEL_DATA +//00802800~00802FFF @COMMON_DATA +//00803000~00807FFF @DSP_DATA + + +template +static T DYNACALL scpu_ReadMemArm(u32 addr, Arm7Context* ctx) +{ + T rv; + + addr &= 0x00FFFFFF; + if (addr < 0x800000) + { + rv = *(T*)&ctx->aica_ram[addr & (ctx->aram_mask - (sz - 1))]; + } + else + { + rv = arm_ReadReg(ctx, addr); + } + + if (unlikely(sz == 4 && addr & 3)) + { + u32 sf = (addr & 3) * 8; + return (rv >> sf) | (rv << (32 - sf)); + } + else + return rv; +} + +template +static void DYNACALL scpu_WriteMemArm(u32 addr, T data, Arm7Context* ctx) +{ + addr &= 0x00FFFFFF; + if (addr < 0x800000) + { + *(T*)&ctx->aica_ram[addr & (ctx->aram_mask - (sz - 1))] = data; + } + else + { + arm_WriteReg(ctx, addr, data); + } +} struct SoundCPU_impl : SoundCPU { + Arm7Context ctx; + unique_ptr arm; + + SoundCPU_impl(AICA* aica, u8* aica_ram, u32 aram_size) { + ctx.aica_ram = aica_ram; + ctx.aram_mask = aram_size - 1; + ctx.aica = aica; + + ctx.read8 = &scpu_ReadMemArm<1, u8>; + ctx.read32 = &scpu_ReadMemArm<4, u32>; + + ctx.write8 = &scpu_WriteMemArm<1, u8>; + ctx.write32 = &scpu_WriteMemArm<4, u32>; + + setBackend(ARM7BE_INTERPRETER); + } //called when plugin is used by emu (you should do first time init here) - s32 Init() + bool Init() { - arm_Init(); - - return rv_ok; + return true; } //called when plugin is unloaded by emu, only if dcInit is called (eg, not called to enumerate plugins) @@ -24,22 +157,127 @@ struct SoundCPU_impl : SoundCPU { //It's supposed to reset anything void Reset(bool Manual) { - arm_Reset(); - arm_SetEnabled(false); + ctx.enabled = false; + // clean registers + memset(&ctx.regs[0], 0, sizeof(ctx.regs)); + + ctx.armMode = 0x13; + + ctx.regs[13].I = 0x03007F00; + ctx.regs[15].I = 0x0000000; + ctx.regs[16].I = 0x00000000; + ctx.regs[R13_IRQ].I = 0x03007FA0; + ctx.regs[R13_SVC].I = 0x03007FE0; + ctx.armIrqEnable = true; + ctx.armFiqEnable = false; + + //armState = true; + ctx.C_FLAG = ctx.V_FLAG = ctx.N_FLAG = ctx.Z_FLAG = false; + + // disable FIQ + ctx.regs[16].I |= 0x40; + + ARM7Backend::CPUUpdateCPSR(&ctx); + + ctx.regs[R15_ARM_NEXT].I = ctx.regs[15].I; + ctx.regs[15].I += 4; + + arm->UpdateInterrupts(); + arm->InvalidateJitCache(); } void SetResetState(u32 state) { - arm_SetEnabled(state == 0); + bool enabled = state == 0; + + if (!ctx.enabled && enabled) + Reset(false); + + ctx.enabled = enabled; } //Mainloop void Update(u32 Cycles) { - arm_Run(Cycles / arm_sh4_bias); + if (ctx.enabled) { + for (int i = 0; i < 32; i++) + { + arm->Run(Cycles / 32 / arm_sh4_bias); + libAICA_TimeStep(); + } + } + } + + bool setBackend(Arm7Backends backend) { + + if (backend == ARM7BE_INTERPRETER) { + arm.reset(ARM7Backend::CreateInterpreter(&ctx)); + ctx.backend = arm.get(); + + return true; + } +#if FEAT_AREC != DYNAREC_NONE + else if (backend == ARM7BE_DYNAREC) { + arm.reset(ARM7Backend::CreateJit(&ctx)); + ctx.backend = arm.get(); + + return true; + } +#endif + + return false; + } + + void InterruptChange(u32 bits, u32 L) + { + ctx.aica_interr = bits != 0; + if (ctx.aica_interr) + ctx.aica_reg_L = L; + update_e68k(&ctx); + } + + void InvalidateJitCache() { + arm->InvalidateJitCache(); + } + + void serialize(void** data, unsigned int* total_size) + { + REICAST_S(ctx.aica_interr); + REICAST_S(ctx.aica_reg_L); + REICAST_S(ctx.e68k_out); + REICAST_S(ctx.e68k_reg_L); + REICAST_S(ctx.e68k_reg_M); + + REICAST_SA(ctx.regs, RN_ARM_REG_COUNT); + REICAST_S(ctx.armIrqEnable); + REICAST_S(ctx.armFiqEnable); + REICAST_S(ctx.armMode); + REICAST_S(ctx.enabled); + } + + void unserialize(void** data, unsigned int* total_size) + { + REICAST_US(ctx.aica_interr); + REICAST_US(ctx.aica_reg_L); + REICAST_US(ctx.e68k_out); + REICAST_US(ctx.e68k_reg_L); + REICAST_US(ctx.e68k_reg_M); + + REICAST_USA(ctx.regs, RN_ARM_REG_COUNT); + REICAST_US(ctx.armIrqEnable); + REICAST_US(ctx.armFiqEnable); + REICAST_US(ctx.armMode); + REICAST_US(ctx.enabled); } }; -SoundCPU* SoundCPU::Create() { - return new SoundCPU_impl(); +SoundCPU* SoundCPU::Create(AICA* aica, u8* aica_ram, u32 aram_size) { + return new SoundCPU_impl(aica, aica_ram, aram_size); +} + +void libARM_SetResetState(bool Reset) { + sh4_cpu->GetA0H(A0H_SCPU)->SetResetState(Reset); +} +void libARM_InterruptChange(u32 bits, u32 L) { + sh4_cpu->GetA0H(A0H_SCPU)->InterruptChange(bits, L); } diff --git a/libswirl/hw/arm7/SoundCPU.h b/libswirl/hw/arm7/SoundCPU.h new file mode 100644 index 0000000000..218594baaf --- /dev/null +++ b/libswirl/hw/arm7/SoundCPU.h @@ -0,0 +1,23 @@ +#pragma once +#include "hw/sh4/sh4_mmio.h" + +enum Arm7Backends { + ARM7BE_INTERPRETER, + ARM7BE_DYNAREC +}; + +struct AICA; + +struct SoundCPU : MMIODevice { + + virtual bool setBackend(Arm7Backends backend) = 0; + + virtual void SetResetState(u32 State) = 0; + virtual void Update(u32 cycles) = 0; + virtual void InterruptChange(u32 bits, u32 L) = 0; + virtual void InvalidateJitCache() = 0; + + virtual ~SoundCPU() { } + + static SoundCPU* Create(AICA* aica, u8* aica_ram, u32 aram_size); +}; \ No newline at end of file diff --git a/libswirl/hw/arm7/arm-new.h b/libswirl/hw/arm7/arm-new.h index b0b427fa6d..7e76d9f7b0 100644 --- a/libswirl/hw/arm7/arm-new.h +++ b/libswirl/hw/arm7/arm-new.h @@ -17,6 +17,31 @@ // along with this program; if not, write to the Free Software Foundation, // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +/* +table generated via: + + #include + + int main() + { + for (int i = 0; i < 256; i++) + { + int count = 0; + for (int j = 0; j < 8; j++) + if (i & (1 << j)) + count++; + + printf("%d, ", count); + } + + + return 0; + } +*/ + +static const u8 cpuBitsSet[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; + + #ifdef BKPT_SUPPORT #define CONSOLE_OUTPUT(a,b) \ @@ -1433,7 +1458,7 @@ if(cond_res) { clockTicks += 5; } break; - /* +#if 0 // not in ARM7DI case 0x00b: case 0x02b: { @@ -1725,7 +1750,8 @@ if(cond_res) { if(dest != base) reg[base].I = address; } - break;*/ + break; + case 0x01d: case 0x03d: { @@ -1878,6 +1904,7 @@ if(cond_res) { reg[base].I = address; } break; + case 0x01f: case 0x03f: { @@ -2030,6 +2057,7 @@ if(cond_res) { reg[base].I = address; } break; +#endif LOGICAL_DATA_OPCODE_WITHOUT_base(OP_EOR, OP_EOR, 0x020); LOGICAL_DATA_OPCODE_WITHOUT_base(OP_EORS, OP_EOR, 0x030); case 0x029: @@ -2078,6 +2106,7 @@ if(cond_res) { ARITHMETIC_DATA_OPCODE(OP_RSBS, OP_RSB, 0x070); ARITHMETIC_DATA_OPCODE(OP_ADD, OP_ADD, 0x080); ARITHMETIC_DATA_OPCODE(OP_ADDS, OP_ADD, 0x090); +#if 0 // only on arm7tm case 0x089: { // UMULL RdLo, RdHi, Rn, Rs @@ -2120,8 +2149,10 @@ if(cond_res) { clockTicks += 5; } break; +#endif ARITHMETIC_DATA_OPCODE(OP_ADC, OP_ADC, 0x0a0); ARITHMETIC_DATA_OPCODE(OP_ADCS, OP_ADC, 0x0b0); +#if 0 // only on arm7tm case 0x0a9: { // UMLAL RdLo, RdHi, Rn, Rs @@ -2170,8 +2201,10 @@ if(cond_res) { clockTicks += 6; } break; +#endif ARITHMETIC_DATA_OPCODE(OP_SBC, OP_SBC, 0x0c0); ARITHMETIC_DATA_OPCODE(OP_SBCS, OP_SBC, 0x0d0); +#if 0 case 0x0c9: { // SMULL RdLo, RdHi, Rm, Rs @@ -2220,8 +2253,10 @@ if(cond_res) { clockTicks += 5; } break; +#endif ARITHMETIC_DATA_OPCODE(OP_RSC, OP_RSC, 0x0e0); ARITHMETIC_DATA_OPCODE(OP_RSCS, OP_RSC, 0x0f0); +#if 0 case 0x0e9: { // SMLAL RdLo, RdHi, Rm, Rs @@ -2276,6 +2311,7 @@ if(cond_res) { clockTicks += 6; } break; +#endif LOGICAL_DATA_OPCODE(OP_TST, OP_TST, 0x110); case 0x100: // MRS Rd, CPSR diff --git a/libswirl/hw/arm7/arm64.cpp b/libswirl/hw/arm7/arm64.cpp deleted file mode 100644 index 5d68f45412..0000000000 --- a/libswirl/hw/arm7/arm64.cpp +++ /dev/null @@ -1,530 +0,0 @@ -/* - Copyright 2019 flyinghead - - This file is part of reicast. - - reicast is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - reicast is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with reicast. If not, see . - */ - -#include "build.h" - -#if HOST_CPU == CPU_ARM64 && FEAT_AREC != DYNAREC_NONE - -#include -#include "arm7.h" -#include "jit/emitter/arm32/arm_coding.h" -#include "deps/vixl/aarch64/macro-assembler-aarch64.h" -using namespace vixl::aarch64; -//#include "deps/vixl/aarch32/disasm-aarch32.h" - -extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end); -extern u32 arm_single_op(u32 opcode); -extern "C" void arm_dispatch(); -extern "C" void arm_exit(); - -extern u8* icPtr; -extern u8* ICache; -extern const u32 ICacheSize; -extern reg_pair arm_Reg[RN_ARM_REG_COUNT]; - -MacroAssembler *assembler; - -extern "C" void armFlushICache(void *bgn, void *end) { - vmem_platform_flush_cache(bgn, end, bgn, end); -} - -static MemOperand arm_reg_operand(u32 regn) -{ - return MemOperand(x28, (u8*)&arm_Reg[regn].I - (u8*)&arm_Reg[0].I); -} - -//helpers ... -void LoadReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) -{ - assembler->Ldr(Register::GetWRegFromCode(rd), arm_reg_operand(regn)); -} -void StoreReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) -{ - assembler->Str(Register::GetWRegFromCode(rd), arm_reg_operand(regn)); -} - -void *armv_start_conditional(ARM::ConditionCode cc) -{ - if (cc == ARM::CC_AL) - return NULL; - Label *label = new Label(); - verify(cc <= ARM::CC_LE); - Condition condition = (Condition)((u32)cc ^ 1); - assembler->B(label, condition); - - return label; -} - -void armv_end_conditional(void *ref) -{ - if (ref != NULL) - { - Label *label = (Label *)ref; - assembler->Bind(label); - delete label; - } -} - -//For COND -void LoadFlags() -{ - //Load flags - LoadReg(ARM::r0, RN_PSR_FLAGS); - //move them to flags register - assembler->Msr(NZCV, x0); -} - -void StoreFlags() -{ - //get results from flags register - assembler->Mrs(x1, NZCV); - //Store flags - StoreReg(ARM::r1, RN_PSR_FLAGS); -} - -void armv_imm_to_reg(u32 regn, u32 imm) -{ - assembler->Mov(w0, imm); - assembler->Str(w0, arm_reg_operand(regn)); -} - -void armv_call(void* loc) -{ - ptrdiff_t offset = reinterpret_cast(loc) - assembler->GetBuffer()->GetStartAddress(); - Label function_label; - assembler->BindToOffset(&function_label, offset); - assembler->Bl(&function_label); -} - -void armv_setup() -{ - assembler = new MacroAssembler(icPtr, ICache + ICacheSize - icPtr); -} - -void armv_intpr(u32 opcd) -{ - //Call interpreter - assembler->Mov(w0, opcd); - armv_call((void*)&arm_single_op); -} - -void armv_end(void* codestart, u32 cycl) -{ - //Normal block end - //cycle counter rv - - //pop registers & return - assembler->Subs(w27, w27, cycl); - ptrdiff_t offset = reinterpret_cast(arm_exit) - assembler->GetBuffer()->GetStartAddress(); - Label arm_exit_label; - assembler->BindToOffset(&arm_exit_label, offset); - assembler->B(&arm_exit_label, mi); //statically predicted as not taken - - offset = reinterpret_cast(arm_dispatch) - assembler->GetBuffer()->GetStartAddress(); - Label arm_dispatch_label; - assembler->BindToOffset(&arm_dispatch_label, offset); - assembler->B(&arm_dispatch_label); - - assembler->FinalizeCode(); - verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity()); - vmem_platform_flush_cache( - codestart, assembler->GetBuffer()->GetEndAddress(), - codestart, assembler->GetBuffer()->GetEndAddress()); - icPtr += assembler->GetBuffer()->GetSizeInBytes(); - -#if 0 - Instruction* instr_start = (Instruction *)codestart; - Instruction* instr_end = assembler->GetBuffer()->GetEndAddress(); - Decoder decoder; - Disassembler disasm; - decoder.AppendVisitor(&disasm); - Instruction* instr; - for (instr = instr_start; instr < instr_end; instr += kInstructionSize) { - decoder.Decode(instr); - printf("arm64 arec\t %p:\t%s\n", - reinterpret_cast(instr), - disasm.GetOutput()); - } -#endif - delete assembler; - assembler = NULL; -} - -//Hook cus varm misses this, so x86 needs special code -void armv_MOV32(ARM::eReg regn, u32 imm) -{ - assembler->Mov(Register::GetWRegFromCode(regn), imm); -} - -void armv_mov(ARM::eReg regd, ARM::eReg regn) -{ - assembler->Mov(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn)); -} - -void armv_add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) -{ - assembler->Add(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), Register::GetWRegFromCode(regm)); -} - -void armv_sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) -{ - assembler->Sub(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), Register::GetWRegFromCode(regm)); -} - -void armv_add(ARM::eReg regd, ARM::eReg regn, s32 imm) -{ - assembler->Add(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), imm); -} - -void armv_lsl(ARM::eReg regd, ARM::eReg regn, u32 imm) -{ - assembler->Lsl(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), imm); -} - -void armv_bic(ARM::eReg regd, ARM::eReg regn, u32 imm) -{ - assembler->Bic(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), imm); -} - -class android_buf : public std::stringbuf -{ -public: - virtual int sync() override { - printf("ARM7: %s\n", this->str().c_str()); - str(""); - - return 0; - } -}; - -void armEmit32(u32 opcode) -{ -#if 0 - if (opcode != 0x00011001) - { - android_buf buffer; - std::ostream cout(&buffer); - vixl::aarch32::PrintDisassembler disasm(cout, 0); - disasm.DecodeA32(opcode); - cout.flush(); - } -#endif - - const Register& rd = Register::GetWRegFromCode((opcode >> 12) & 15); - const Register& rn = Register::GetWRegFromCode((opcode >> 16) & 15); - bool set_flags = opcode & (1 << 20); - Operand op2; - int op_type = (opcode >> 21) & 15; - bool logical_op = op_type == 0 || op_type == 1 || op_type == 8 || op_type == 9 // AND, EOR, TST, TEQ - || op_type == 12 || op_type == 13 || op_type == 15 || op_type == 14; // ORR, MOV, MVN, BIC - bool set_carry_bit = false; - - ARM::ConditionCode condition = (ARM::ConditionCode)(opcode >> 28); - void *cond_op_label = armv_start_conditional(condition); - - if (opcode & (1 << 25)) - { - // op2 is imm8r4 - u32 rotate = ((opcode >> 8) & 15) << 1; - u32 imm8 = opcode & 0xff; - op2 = Operand((imm8 >> rotate) | (imm8 << (32 - rotate))); - } - else - { - // op2 is register - const Register& rm = Register::GetWRegFromCode(opcode & 15); - - Shift shift = (Shift)((opcode >> 5) & 3); - - if (opcode & (1 << 4)) - { - // shift by register - // FIXME Carry must be set based on shift/rotate - //if (set_flags && logical_op) - // die("shift by register with set flags C - not implemented"); - const Register& shift_reg = Register::GetWRegFromCode((opcode >> 8) & 15); - - Label shift_by_32_label; - - switch (shift) - { - case LSL: - case LSR: - assembler->Mrs(x0, NZCV); - assembler->Cmp(shift_reg, 32); - if (shift == LSL) - assembler->Lsl(w15, rm, shift_reg); - else - assembler->Lsr(w15, rm, shift_reg); - assembler->Csel(w15, 0, w15, ge); // LSL and LSR by 32 or more gives 0 - assembler->Msr(NZCV, x0); - break; - case ASR: - assembler->Mrs(x0, NZCV); - assembler->Cmp(shift_reg, 32); - assembler->Asr(w15, rm, shift_reg); - assembler->Sbfx(w13, rm, 31, 1); - assembler->Csel(w15, w13, w15, ge); // ASR by 32 or more gives 0 or -1 depending on operand sign - assembler->Msr(NZCV, x0); - break; - case ROR: - assembler->Ror(w15, rm, shift_reg); - break; - default: - die("Invalid shift"); - break; - } - op2 = Operand(w15); - } - else - { - // shift by immediate - u32 shift_imm = (opcode >> 7) & 0x1f; - if (shift != ROR && shift_imm != 0 && !(set_flags && logical_op)) - { - op2 = Operand(rm, shift, shift_imm); - } - else if (shift_imm == 0) - { - if (shift == LSL) - { - op2 = Operand(rm); // LSL 0 is a no-op - } - else - { - // Shift by 32 - if (set_flags && logical_op) - set_carry_bit = true; - if (shift == LSR) - { - if (set_flags && logical_op) - assembler->Ubfx(w14, rm, 31, 1); // w14 = rm[31] - assembler->Mov(w15, 0); // w15 = 0 - } - else if (shift == ASR) - { - if (set_flags && logical_op) - assembler->Ubfx(w14, rm, 31, 1); // w14 = rm[31] - assembler->Sbfx(w15, rm, 31, 1); // w15 = rm < 0 ? -1 : 0 - } - else if (shift == ROR) - { - // RRX - assembler->Cset(w14, cs); // w14 = C - assembler->Mov(w15, Operand(rm, LSR, 1)); // w15 = rm >> 1 - assembler->Bfi(w15, w14, 31, 1); // w15[31] = C - if (set_flags && logical_op) - assembler->Ubfx(w14, rm, 0, 1); // w14 = rm[0] (new C) - } - else - die("Invalid shift"); - op2 = Operand(w15); - } - } - else - { - // Carry must be preserved or Ror shift - if (set_flags && logical_op) - set_carry_bit = true; - if (shift == LSL) - { - assembler->Ubfx(w14, rm, 32 - shift_imm, 1); // w14 = rm[lsb] - assembler->Lsl(w15, rm, shift_imm); // w15 <<= shift - } - else - { - if (set_flags && logical_op) - assembler->Ubfx(w14, rm, shift_imm - 1, 1); // w14 = rm[msb] - - if (shift == LSR) - assembler->Lsr(w15, rm, shift_imm); // w15 >>= shift - else if (shift == ASR) - assembler->Asr(w15, rm, shift_imm); - else if (shift == ROR) - assembler->Ror(w15, rm, shift_imm); - else - die("Invalid shift"); - } - op2 = Operand(w15); - } - } - } - if (!set_carry_bit - && (op_type == 8 || op_type == 9 // TST and TEQ always set flags - || (logical_op && set_flags))) - { - // Logical ops should only affect the carry bit based on the op2 shift - // Here we're not shifting so the carry bit should be preserved - set_carry_bit = true; - assembler->Cset(w14, cs); - } - - switch (op_type) - { - case 0: // AND - if (set_flags) - assembler->Ands(rd, rn, op2); - else - assembler->And(rd, rn, op2); - break; - case 1: // EOR - assembler->Eor(rd, rn, op2); - if (set_flags) - assembler->Tst(rd, rd); - break; - case 2: // SUB - if (set_flags) - assembler->Subs(rd, rn, op2); - else - assembler->Sub(rd, rn, op2); - break; - case 3: // RSB - assembler->Neg(w0, rn); - if (set_flags) - assembler->Adds(rd, w0, op2); - else - assembler->Add(rd, w0, op2); - break; - case 4: // ADD - if (set_flags) - assembler->Adds(rd, rn, op2); - else - assembler->Add(rd, rn, op2); - break; - case 12: // ORR - assembler->Orr(rd, rn, op2); - if (set_flags) - assembler->Tst(rd, rd); - break; - case 14: // BIC - if (set_flags) - assembler->Bics(rd, rn, op2); - else - assembler->Bic(rd, rn, op2); - break; - case 5: // ADC - if (set_flags) - assembler->Adcs(rd, rn, op2); - else - assembler->Adc(rd, rn, op2); - break; - case 6: // SBC - if (set_flags) - assembler->Sbcs(rd, rn, op2); - else - assembler->Sbc(rd, rn, op2); - break; - case 7: // RSC - assembler->Ngc(w0, rn); - if (set_flags) - assembler->Adds(rd, w0, op2); - else - assembler->Add(rd, w0, op2); - break; - case 8: // TST - assembler->Tst(rn, op2); - break; - case 9: // TEQ - assembler->Eor(w0, rn, op2); - assembler->Tst(w0, w0); - break; - case 10: // CMP - assembler->Cmp(rn, op2); - break; - case 11: // CMN - assembler->Cmn(rn, op2); - break; - case 13: // MOV - assembler->Mov(rd, op2); - if (set_flags) - assembler->Tst(rd, rd); - break; - case 15: // MVN - assembler->Mvn(rd, op2); - if (set_flags) - assembler->Tst(rd, rd); - break; - } - if (set_carry_bit) - { - assembler->Mrs(x0, NZCV); - assembler->Bfi(x0, x14, 29, 1); // C is bit 29 in NZCV - assembler->Msr(NZCV, x0); - } - armv_end_conditional(cond_op_label); -} - -// -// Dynarec main loop -// -// w25 is used for temp mem save (post increment op2) -// x26 is the entry points table -// w27 is the cycle counter -// x28 points to the arm7 registers base -__asm__ ( - ".globl arm_compilecode \n\t" - ".hidden arm_compilecode \n" - "arm_compilecode: \n\t" - "bl CompileCode \n\t" - "b arm_dispatch \n\t" - - ".globl arm_mainloop \n\t" - ".hidden arm_mainloop \n" - "arm_mainloop: \n\t" // arm_mainloop(cycles, regs, entry points) - "stp x25, x26, [sp, #-48]! \n\t" - "stp x27, x28, [sp, #16] \n\t" - "stp x29, x30, [sp, #32] \n\t" - - "mov x28, x1 \n\t" // arm7 registers - "mov x26, x2 \n\t" // lookup base - - "ldr w27, [x28, #192] \n\t" // cycle count - "add w27, w27, w0 \n" // add cycles for this timeslice - - ".globl arm_dispatch \n\t" - ".hidden arm_dispatch \n" - "arm_dispatch: \n\t" - "ldp w0, w1, [x28, #184] \n\t" // load Next PC, interrupt -#if ARAM_SIZE == 2*1024*1024 - "ubfx w2, w0, #2, #19 \n\t" // w2 = pc >> 2. Note: assuming address space == 2 MB (21 bits) -#elif ARAM_SIZE == 8*1024*1024 - "ubfx w2, w0, #2, #21 \n\t" // w2 = pc >> 2. Note: assuming address space == 8 MB (23 bits) -#else -#error Unsupported AICA RAM size -#endif - "cbnz w1, arm_dofiq \n\t" // if interrupt pending, handle it - - "add x2, x26, x2, lsl #3 \n\t" // x2 = EntryPoints + pc << 1 - "ldr x3, [x2] \n\t" - "br x3 \n" - - "arm_dofiq: \n\t" - "bl CPUFiq \n\t" - "b arm_dispatch \n\t" - - ".globl arm_exit \n\t" - ".hidden arm_exit \n" - "arm_exit: \n\t" - "str w27, [x28, #192] \n\t" // if timeslice is over, save remaining cycles - "ldp x29, x30, [sp, #32] \n\t" - "ldp x27, x28, [sp, #16] \n\t" - "ldp x25, x26, [sp], #48 \n\t" - "ret \n" -); -#endif // ARM64 diff --git a/libswirl/hw/arm7/arm7.cpp b/libswirl/hw/arm7/arm7.cpp index e58d3aa757..280235d6f5 100644 --- a/libswirl/hw/arm7/arm7.cpp +++ b/libswirl/hw/arm7/arm7.cpp @@ -1,31 +1,22 @@ #include "arm7.h" #include "arm_mem.h" -#include "virt_arm.h" - -#include - - -#define C_CORE +#include "arm7_context.h" +#include "hw/sh4/sh4_core.h" +#include "hw/aica/aica_mmio.h" -#if 0 - #define arm_printf printf -#else - void arm_printf(...) { } -#endif //#define CPUReadHalfWordQuick(addr) arm_ReadMem16(addr & 0x7FFFFF) -#define CPUReadMemoryQuick(addr) (*(u32*)&aica_ram[addr&ARAM_MASK]) -#define CPUReadByte arm_ReadMem8 -#define CPUReadMemory arm_ReadMem32 -#define CPUReadHalfWord arm_ReadMem16 -#define CPUReadHalfWordSigned(addr) ((s16)arm_ReadMem16(addr)) +#define CPUReadMemoryQuick(addr) (*(u32*)&ctx->aica_ram[addr&ctx->aram_mask]) +#define CPUReadByte(a) arm_ReadMem8(a, ctx) +#define CPUReadMemory(a) arm_ReadMem32(a, ctx) -#define CPUWriteMemory arm_WriteMem32 -#define CPUWriteHalfWord arm_WriteMem16 -#define CPUWriteByte arm_WriteMem8 +#define CPUWriteMemory(a, d) arm_WriteMem32(a, d, ctx) +#define CPUWriteByte(a, d) arm_WriteMem8(a, d, ctx) - -#define reg arm_Reg +#define armMode ctx->armMode +#define armIrqEnable ctx->armIrqEnable +#define armFiqEnable ctx->armFiqEnable +#define reg ctx->regs #define armNextPC reg[R15_ARM_NEXT].I @@ -40,9 +31,7 @@ //bool arm_FiqPending; -- not used , i use the input directly :) //bool arm_IrqPending; -DECL_ALIGN(8) reg_pair arm_Reg[RN_ARM_REG_COUNT]; - -void CPUSwap(u32 *a, u32 *b) +void CPUSwap(u32* a, u32* b) { u32 c = *b; *b = *a; @@ -60,76 +49,41 @@ bool V_FLAG; #define C_FLAG (reg[RN_PSR_FLAGS].FLG.C) #define V_FLAG (reg[RN_PSR_FLAGS].FLG.V) -bool armIrqEnable; -bool armFiqEnable; -//bool armState; -int armMode; - -bool Arm7Enabled=false; - -u8 cpuBitsSet[256]; -bool intState = false; -bool stopState = false; -bool holdState = false; +#define arm_ReadMem8 ctx->read8 +#define arm_ReadMem32 ctx->read32 +#define arm_WriteMem8 ctx->write8 +#define arm_WriteMem32 ctx->write32 - -void CPUSwitchMode(int mode, bool saveState, bool breakLoop=true); -extern "C" void CPUFiq(); -void CPUUpdateCPSR(); -void CPUUpdateFlags(); -void CPUSoftwareInterrupt(int comment); -void CPUUndefinedException(); - -#if FEAT_AREC == DYNAREC_NONE - -void arm_Run_(u32 CycleCount) +void DYNACALL ARM7Backend::UpdateInterrupts(Arm7Context* ctx) { - if (!Arm7Enabled) - return; - - u32 clockTicks=0; - while (clockTickse68k_out && armFiqEnable; } -#endif - -void armt_init(); -//void CreateTables(); -void arm_Init() +void DYNACALL ARM7Backend::CPUUpdateFlags(Arm7Context* ctx) { -#if FEAT_AREC != DYNAREC_NONE - armt_init(); -#endif - //CreateTables(); - arm_Reset(); + u32 CPSR = reg[16].I; - for (int i = 0; i < 256; i++) - { - int count = 0; - for (int j = 0; j < 8; j++) - if (i & (1 << j)) - count++; + reg[RN_PSR_FLAGS].FLG.NZCV = reg[16].PSR.NZCV; - cpuBitsSet[i] = count; - } + /* + N_FLAG = (CPSR & 0x80000000) ? true: false; + Z_FLAG = (CPSR & 0x40000000) ? true: false; + C_FLAG = (CPSR & 0x20000000) ? true: false; + V_FLAG = (CPSR & 0x10000000) ? true: false; + */ + //armState = (CPSR & 0x20) ? false : true; + armIrqEnable = (CPSR & 0x80) ? false : true; + armFiqEnable = (CPSR & 0x40) ? false : true; + UpdateInterrupts(ctx); } -void CPUSwitchMode(int mode, bool saveState, bool breakLoop) +void DYNACALL ARM7Backend::CPUSwitchMode(Arm7Context* ctx, int mode, bool saveState) { - CPUUpdateCPSR(); + CPUUpdateCPSR(ctx); - switch(armMode) + switch (armMode) { case 0x10: case 0x1F: @@ -148,31 +102,31 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) reg[SPSR_FIQ].I = reg[17].I; break; case 0x12: - reg[R13_IRQ].I = reg[13].I; - reg[R14_IRQ].I = reg[14].I; - reg[SPSR_IRQ].I = reg[17].I; + reg[R13_IRQ].I = reg[13].I; + reg[R14_IRQ].I = reg[14].I; + reg[SPSR_IRQ].I = reg[17].I; break; case 0x13: - reg[R13_SVC].I = reg[13].I; - reg[R14_SVC].I = reg[14].I; - reg[SPSR_SVC].I = reg[17].I; + reg[R13_SVC].I = reg[13].I; + reg[R14_SVC].I = reg[14].I; + reg[SPSR_SVC].I = reg[17].I; break; case 0x17: - reg[R13_ABT].I = reg[13].I; - reg[R14_ABT].I = reg[14].I; - reg[SPSR_ABT].I = reg[17].I; + reg[R13_ABT].I = reg[13].I; + reg[R14_ABT].I = reg[14].I; + reg[SPSR_ABT].I = reg[17].I; break; case 0x1b: - reg[R13_UND].I = reg[13].I; - reg[R14_UND].I = reg[14].I; - reg[SPSR_UND].I = reg[17].I; + reg[R13_UND].I = reg[13].I; + reg[R14_UND].I = reg[14].I; + reg[SPSR_UND].I = reg[17].I; break; } u32 CPSR = reg[16].I; u32 SPSR = reg[17].I; - switch(mode) + switch (mode) { case 0x10: case 0x1F: @@ -188,7 +142,7 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) CPUSwap(®[12].I, ®[R12_FIQ].I); reg[13].I = reg[R13_FIQ].I; reg[14].I = reg[R14_FIQ].I; - if(saveState) + if (saveState) reg[17].I = CPSR; else reg[17].I = reg[SPSR_FIQ].I; @@ -197,7 +151,7 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) reg[13].I = reg[R13_IRQ].I; reg[14].I = reg[R14_IRQ].I; reg[16].I = SPSR; - if(saveState) + if (saveState) reg[17].I = CPSR; else reg[17].I = reg[SPSR_IRQ].I; @@ -206,7 +160,7 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) reg[13].I = reg[R13_SVC].I; reg[14].I = reg[R14_SVC].I; reg[16].I = SPSR; - if(saveState) + if (saveState) reg[17].I = CPSR; else reg[17].I = reg[SPSR_SVC].I; @@ -215,7 +169,7 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) reg[13].I = reg[R13_ABT].I; reg[14].I = reg[R14_ABT].I; reg[16].I = SPSR; - if(saveState) + if (saveState) reg[17].I = CPSR; else reg[17].I = reg[SPSR_ABT].I; @@ -224,7 +178,7 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) reg[13].I = reg[R13_UND].I; reg[14].I = reg[R14_UND].I; reg[16].I = SPSR; - if(saveState) + if (saveState) reg[17].I = CPSR; else reg[17].I = reg[SPSR_UND].I; @@ -235,147 +189,60 @@ void CPUSwitchMode(int mode, bool saveState, bool breakLoop) break; } armMode = mode; - CPUUpdateFlags(); - CPUUpdateCPSR(); + CPUUpdateFlags(ctx); + CPUUpdateCPSR(ctx); } -void CPUUpdateCPSR() +void DYNACALL ARM7Backend::CPUUpdateCPSR(Arm7Context* ctx) { - reg_pair CPSR; + arm7_reg CPSR; - CPSR.I = reg[RN_CPSR].I & 0x40; + CPSR.I = ctx->regs[RN_CPSR].I & 0x40; - /* - if(N_FLAG) - CPSR |= 0x80000000; - if(Z_FLAG) - CPSR |= 0x40000000; - if(C_FLAG) - CPSR |= 0x20000000; - if(V_FLAG) - CPSR |= 0x10000000; - if(!armState) - CPSR |= 0x00000020; - */ - - CPSR.PSR.NZCV=reg[RN_PSR_FLAGS].FLG.NZCV; + CPSR.PSR.NZCV = ctx->regs[RN_PSR_FLAGS].FLG.NZCV; if (!armFiqEnable) CPSR.I |= 0x40; - if(!armIrqEnable) + if (!armIrqEnable) CPSR.I |= 0x80; - CPSR.PSR.M=armMode; - - reg[16].I = CPSR.I; -} - -void CPUUpdateFlags() -{ - u32 CPSR = reg[16].I; - - reg[RN_PSR_FLAGS].FLG.NZCV=reg[16].PSR.NZCV; + CPSR.PSR.M = armMode; - /* - N_FLAG = (CPSR & 0x80000000) ? true: false; - Z_FLAG = (CPSR & 0x40000000) ? true: false; - C_FLAG = (CPSR & 0x20000000) ? true: false; - V_FLAG = (CPSR & 0x10000000) ? true: false; - */ - //armState = (CPSR & 0x20) ? false : true; - armIrqEnable = (CPSR & 0x80) ? false : true; - armFiqEnable = (CPSR & 0x40) ? false : true; - update_armintc(); + ctx->regs[16].I = CPSR.I; } -void CPUSoftwareInterrupt(int comment) + +static void CPUSoftwareInterrupt(Arm7Context* ctx, int comment) { - u32 PC = reg[R15_ARM_NEXT].I+4; + u32 PC = reg[R15_ARM_NEXT].I + 4; //bool savedArmState = armState; - CPUSwitchMode(0x13, true, false); + ARM7Backend::CPUSwitchMode(ctx, 0x13, true); reg[14].I = PC; -// reg[15].I = 0x08; - + // reg[15].I = 0x08; + armIrqEnable = false; armNextPC = 0x08; -// reg[15].I += 4; + // reg[15].I += 4; } -void CPUUndefinedException() +static void CPUUndefinedException(Arm7Context* ctx) { printf("arm7: CPUUndefinedException(). SOMETHING WENT WRONG\n"); - u32 PC = reg[R15_ARM_NEXT].I+4; - CPUSwitchMode(0x1b, true, false); + u32 PC = reg[R15_ARM_NEXT].I + 4; + ARM7Backend::CPUSwitchMode(ctx, 0x1b, true); reg[14].I = PC; -// reg[15].I = 0x04; + // reg[15].I = 0x04; armIrqEnable = false; armNextPC = 0x04; -// reg[15].I += 4; -} - -void FlushCache(); - -void arm_Reset() -{ -#if FEAT_AREC != DYNAREC_NONE - FlushCache(); -#endif - Arm7Enabled = false; - // clean registers - memset(&arm_Reg[0], 0, sizeof(arm_Reg)); - - armMode = 0x1F; - - reg[13].I = 0x03007F00; - reg[15].I = 0x0000000; - reg[16].I = 0x00000000; - reg[R13_IRQ].I = 0x03007FA0; - reg[R13_SVC].I = 0x03007FE0; - armIrqEnable = true; - armFiqEnable = false; - update_armintc(); - - //armState = true; - C_FLAG = V_FLAG = N_FLAG = Z_FLAG = false; - - // disable FIQ - reg[16].I |= 0x40; - - CPUUpdateCPSR(); - - armNextPC = reg[15].I; - reg[15].I += 4; -} - -/* - -//NO IRQ on aica .. -void CPUInterrupt() -{ - u32 PC = reg[15].I; - //bool savedState = armState; - CPUSwitchMode(0x12, true, false); - reg[14].I = PC; - //if(!savedState) - // reg[14].I += 2; - reg[15].I = 0x18; - //armState = true; - armIrqEnable = false; - - armNextPC = reg[15].I; - reg[15].I += 4; + // reg[15].I += 4; } -*/ - -extern "C" -NOINLINE -void CPUFiq() +void DYNACALL ARM7Backend::CPUFiq(Arm7Context* ctx) { - u32 PC = reg[R15_ARM_NEXT].I+4; + u32 PC = reg[R15_ARM_NEXT].I + 4; //bool savedState = armState; - CPUSwitchMode(0x11, true, false); + ARM7Backend::CPUSwitchMode(ctx, 0x11, true); reg[14].I = PC; //if(!savedState) // reg[14].I += 2; @@ -383,1835 +250,52 @@ void CPUFiq() //armState = true; armIrqEnable = false; armFiqEnable = false; - update_armintc(); + ARM7Backend::UpdateInterrupts(ctx); armNextPC = 0x1c; //reg[15].I += 4; } +#define CPUSwitchMode(mode, saveState) CPUSwitchMode(ctx, mode, saveState) +#define CPUUpdateFlags() CPUUpdateFlags(ctx) +#define CPUSoftwareInterrupt(comment) CPUSoftwareInterrupt(ctx, comment) +#define CPUUndefinedException() CPUUndefinedException(ctx) +#define CPUUpdateCPSR() CPUUpdateCPSR(ctx) -/* - --Seems like aica has 3 interrupt controllers actualy (damn lazy sega ..) - The "normal" one (the one that exists on scsp) , one to emulate the 68k intc , and , - of course , the arm7 one - - The output of the sci* bits is input to the e68k , and the output of e68k is inputed into the FIQ - pin on arm7 -*/ -#include "hw/sh4/sh4_core.h" - - -void arm_SetEnabled(bool enabled) -{ - if(!Arm7Enabled && enabled) - arm_Reset(); - - Arm7Enabled=enabled; -} - - - -void update_armintc() -{ - reg[INTR_PEND].I=e68k_out && armFiqEnable; -} - -void libAICA_TimeStep(); - -#if FEAT_AREC == DYNAREC_NONE -void arm_Run(u32 CycleCount) { - for (int i=0;i<32;i++) - { - arm_Run_(CycleCount/32); - libAICA_TimeStep(); - } -} -#else // FEAT_AREC != DYNAREC_NONE - -#if HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN -#include -#endif - -extern "C" void CompileCode(); - -//Emulate a single arm op, passed in opcode -//DYNACALL for ECX passing - -u32 DYNACALL arm_single_op(u32 opcode) -{ - u32 clockTicks=0; +u32 DYNACALL ARM7Backend::singleOp(Arm7Context* ctx, u32 opcode) { + u32 clockTicks = 0; #define NO_OPCODE_READ - //u32 static_opcode=((opcd_hash&0xFFF0)<<16) | ((opcd_hash&0x000F)<<4); - //u32 static_opcode=((opcd_hash)<<28); #include "arm-new.h" - return clockTicks; -} - -/* - - ARM - ALU opcodes (more or less) - - (flags,rv)=opcode(flags,in regs ..) - rd=rv; - if (set_flags) - PSR=(rd==pc?CPSR:flags); - - (mem ops) - Writes of R15: - R15+12 - R15 as base: - R15+8 - LDR - rd=mem[addr(in regs)] - LDM - - ... - STR/STM: pc+12 - - - /// - - "cached" interpreter: - Set PC+12 to PC reg - mov opcode - call function - - if (pc settting opcode) - lookup again using armNextPC - - - PC setting opcodes - ALU with write to PC - LDR with write to PC (SDT) - LDM with write to PC (BDT) - B/BL - SWI - - - Indirect, via write to PSR/Mode - MSR -*/ - - -struct ArmDPOP -{ - u32 key; - u32 mask; - u32 flags; -}; - -vector ops; - -enum OpFlags -{ - OP_SETS_PC = 1, - OP_READS_PC = 32768, - OP_IS_COND = 65536, - OP_MFB = 0x80000000, - - OP_HAS_RD_12 = 2, - OP_HAS_RD_16 = 4, - OP_HAS_RS_0 = 8, - OP_HAS_RS_8 = 16, - OP_HAS_RS_16 = 32, - OP_HAS_FLAGS_READ = 4096, - OP_HAS_FLAGS_WRITE = 8192, - OP_HAS_RD_READ = 16384, //For conditionals - - OP_WRITE_FLAGS = 64, - OP_WRITE_FLAGS_S = 128, - OP_READ_FLAGS = 256, - OP_READ_FLAGS_S = 512, - OP_WRITE_REG = 1024, - OP_READ_REG_1 = 2048, -}; - -#define DP_R_ROFC (OP_READ_FLAGS_S|OP_READ_REG_1) //Reads reg1, op2, flags if S -#define DP_R_ROF (OP_READ_FLAGS|OP_READ_REG_1) //Reads reg1, op2, flags (ADC & co) -#define DP_R_OFC (OP_READ_FLAGS_S) //Reads op2, flags if S - -#define DP_W_RFC (OP_WRITE_FLAGS_S|OP_WRITE_REG) //Writes reg, and flags if S -#define DP_W_F (OP_WRITE_FLAGS) //Writes only flags, always (S=1) - -/* - COND | 00 0 OP1 S Rn Rd SA ST 0 Rm -- Data opcode, PSR xfer (imm shifted reg) - | 00 0 OP1 S Rn Rd Rs 0 ST 1 Rm -- Data opcode, PSR xfer (reg shifted reg) - | 00 0 0 00A S Rd Rn Rs 1001 Rm -- Mult - | 00 0 1 0B0 0 Rn Rd 0000 1001 Rm -- SWP - | 00 1 OP1 S Rn Rd imm8r4 -- Data opcode, PSR xfer (imm8r4) - - | 01 0 P UBW L Rn Rd Offset -- LDR/STR (I=0) - | 01 1 P UBW L Rn Rd SHAM SHTP 0 Rs -- LDR/STR (I=1) - | 10 0 P USW L Rn {RList} -- LDM/STM - | 10 1 L {offset} -- B/BL - | 11 1 1 X* -- SWI - - (undef cases) - | 01 1 XXXX X X* X* X* 1 XXXX - Undefined (LDR/STR w/ encodings that would be reg. based shift) - | 11 0 PUNW L Rn {undef} -- Copr. Data xfer (undef) - | 11 1 0 CPOP Crn Crd Cpn CP3 0 Crm -- Copr. Data Op (undef) - | 11 1 0 CPO3 L Crn Crd Cpn CP3 1 Crm -- Copr. Reg xf (undef) - - - Phase #1: - -Non branches that don't touch memory (pretty much: Data processing, Not MSR, Mult) - -Everything else is ifb - - Phase #2: - Move LDR/STR to templates - - Phase #3: - Move LDM/STM to templates - +#undef NO_OPCODE_READ -*/ - -void AddDPOP(u32 subcd, u32 rflags, u32 wflags) -{ - ArmDPOP op; - - u32 key=subcd<<21; - u32 mask=(15<<21) | (7<<25); - - op.flags=rflags|wflags; - - if (wflags==DP_W_F) - { - //also match S bit for opcodes that must write to flags (CMP & co) - mask|=1<<20; - key|=1<<20; - } - - //ISR form (bit 25=0, bit 4 = 0) - op.key=key; - op.mask=mask | (1<<4); - ops.push_back(op); - - //RSR form (bit 25=0, bit 4 = 1, bit 7=0) - op.key = key | (1<<4); - op.mask = mask | (1<<4) | (1<<7); - ops.push_back(op); - - //imm8r4 form (bit 25=1) - op.key = key | (1<<25); - op.mask = mask; - ops.push_back(op); -} - -void InitHash() -{ - /* - COND | 00 I OP1 S Rn Rd OPER2 -- Data opcode, PSR xfer - Data processing opcodes - */ - - //AND 0000 Rn, OPER2, {Flags} Rd, {Flags} - //EOR 0001 Rn, OPER2, {Flags} Rd, {Flags} - //SUB 0010 Rn, OPER2, {Flags} Rd, {Flags} - //RSB 0011 Rn, OPER2, {Flags} Rd, {Flags} - //ADD 0100 Rn, OPER2, {Flags} Rd, {Flags} - //ORR 1100 Rn, OPER2, {Flags} Rd, {Flags} - //BIC 1110 Rn, OPER2, {Flags} Rd, {Flags} - AddDPOP(0,DP_R_ROFC, DP_W_RFC); - AddDPOP(1,DP_R_ROFC, DP_W_RFC); - AddDPOP(2,DP_R_ROFC, DP_W_RFC); - AddDPOP(3,DP_R_ROFC, DP_W_RFC); - AddDPOP(4,DP_R_ROFC, DP_W_RFC); - AddDPOP(12,DP_R_ROFC, DP_W_RFC); - AddDPOP(14,DP_R_ROFC, DP_W_RFC); - - //ADC 0101 Rn, OPER2, Flags Rd, {Flags} - //SBC 0110 Rn, OPER2, Flags Rd, {Flags} - //RSC 0111 Rn, OPER2, Flags Rd, {Flags} - AddDPOP(5,DP_R_ROF, DP_W_RFC); - AddDPOP(6,DP_R_ROF, DP_W_RFC); - AddDPOP(7,DP_R_ROF, DP_W_RFC); - - //TST 1000 S=1 Rn, OPER2, Flags Flags - //TEQ 1001 S=1 Rn, OPER2, Flags Flags - AddDPOP(8,DP_R_ROF, DP_W_F); - AddDPOP(9,DP_R_ROF, DP_W_F); - - //CMP 1010 S=1 Rn, OPER2 Flags - //CMN 1011 S=1 Rn, OPER2 Flags - AddDPOP(10,DP_R_ROF, DP_W_F); - AddDPOP(11,DP_R_ROF, DP_W_F); - - //MOV 1101 OPER2, {Flags} Rd, {Flags} - //MVN 1111 OPER2, {Flags} Rd, {Flags} - AddDPOP(13,DP_R_OFC, DP_W_RFC); - AddDPOP(15,DP_R_OFC, DP_W_RFC); + return clockTicks; } +u32 DYNACALL ARM7Backend::Step(Arm7Context* ctx) { + u32 clockTicks = 0; - - -/* - * - * X86 Compiler - * - */ - -void armEmit32(u32 emit32); -void *armGetEmitPtr(); - - -#define _DEVEL (1) -#define EMIT_I armEmit32((I)) -#define EMIT_GET_PTR() armGetEmitPtr() -u8* icPtr; -u8* ICache; - -extern const u32 ICacheSize=1024*1024; -#if HOST_OS == OS_WINDOWS -u8 ARM7_TCB[ICacheSize+4096]; -#elif HOST_OS == OS_LINUX - -u8 ARM7_TCB[ICacheSize+4096] __attribute__((section(".text"))); - -#elif HOST_OS==OS_DARWIN -u8 ARM7_TCB[ICacheSize+4096] __attribute__((section("__TEXT, .text"))); -#else -#error ARM7_TCB ALLOC -#endif - -#include "jit/emitter/arm32/arm_emitter.h" -#undef I - - -using namespace ARM; - - -void* EntryPoints[ARAM_SIZE_MAX/4]; - -enum OpType -{ - VOT_Fallback, - VOT_DataOp, - VOT_B, - VOT_BL, - VOT_BR, //Branch (to register) - VOT_Read, //Actually, this handles LDR and STR - //VOT_LDM, //This Isn't used anymore - VOT_MRS, - VOT_MSR, -}; - - - -void armv_call(void* target); -void armv_setup(); -void armv_intpr(u32 opcd); -void armv_end(void* codestart, u32 cycles); -void armv_check_pc(u32 pc); -void armv_check_cache(u32 opcd, u32 pc); -void armv_imm_to_reg(u32 regn, u32 imm); -void armv_MOV32(eReg regn, u32 imm); -void armv_prof(OpType opt,u32 op,u32 flg); - -extern "C" void arm_dispatch(); -extern "C" void arm_exit(); -extern "C" void DYNACALL -#if BUILD_COMPILER == COMPILER_GCC - // Avoid inlining / duplicating / whatever - __attribute__ ((optimize(0))) -#endif - arm_mainloop(u32 cycl, void* regs, void* entrypoints); -extern "C" void DYNACALL arm_compilecode(); - -template -u32 DYNACALL DoMemOp(u32 addr,u32 data) -{ - u32 rv=0; - -#if HOST_CPU==CPU_X86 - addr=virt_arm_reg(0); - data=virt_arm_reg(1); -#endif - - if (L) - { - if (B) - rv=arm_ReadMem8(addr); - else - rv=arm_ReadMem32(addr); - } - else + if (reg[INTR_PEND].I) { - if (B) - arm_WriteMem8(addr,data); - else - arm_WriteMem32(addr,data); + CPUFiq(ctx); } - #if HOST_CPU==CPU_X86 - virt_arm_reg(0)=rv; - #endif - - return rv; -} - -//findfirstset -- used in LDM/STM handling -#if HOST_CPU==CPU_X86 && BUILD_COMPILER != COMPILER_GCC -#include - -u32 findfirstset(u32 v) -{ - unsigned long rv; - _BitScanForward(&rv,v); - return rv+1; -} -#else -#define findfirstset __builtin_ffs -#endif - -#if 0 -//LDM isn't perf. citrical, and as a result, not implemented fully. -//So this code is disabled -//mask is *2 -template -void DYNACALL DoLDM(u32 addr, u32 mask) -{ - -#if HOST_CPU==CPU_X86 - addr=virt_arm_reg(0); - mask=virt_arm_reg(1); -#endif - //addr=(addr); //force align ? - - u32 idx=-1; - do - { - u32 tz=findfirstset(mask); - mask>>=tz; - idx+=tz; - arm_Reg[idx].I=arm_ReadMem32(addr); - addr+=4; - } while(mask); -} -#endif + reg[15].I = armNextPC + 8; +#include "arm-new.h" -void* GetMemOp(bool L, bool B) -{ - if (L) - { - if (B) - return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; - else - return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; - } - else - { - if (B) - return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; - else - return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; - } + return clockTicks; } -//Decodes an opcode, returns type. -//opcd might be changed (currently for LDM/STM -> LDR/STR transforms) -OpType DecodeOpcode(u32& opcd,u32& flags) -{ - //by default, PC has to be updated - flags=OP_READS_PC; - - u32 CC=(opcd >> 28); - - if (CC!=CC_AL) - flags|=OP_IS_COND; - - //helpers ... - #define CHK_BTS(M,S,V) ( (M & (opcd>>S)) == (V) ) //Check bits value in opcode - #define IS_LOAD (opcd & (1<<20)) //Is L bit set ? (LDM/STM LDR/STR) - #define READ_PC_CHECK(S) if (CHK_BTS(15,S,15)) flags|=OP_READS_PC; - - //Opcode sets pc ? - bool _set_pc= - (CHK_BTS(3,26,0) && CHK_BTS(15,12,15)) || //Data processing w/ Rd=PC - (CHK_BTS(3,26,1) && CHK_BTS(15,12,15) && IS_LOAD ) || //LDR/STR w/ Rd=PC - (CHK_BTS(7,25,4) && (opcd & 32768) && IS_LOAD) || //LDM/STM w/ PC in list - CHK_BTS(7,25,5) || //B or BL - CHK_BTS(15,24,15); //SWI - - //NV condition means VFP on newer cores, let interpreter handle it... - if (CC==15) - return VOT_Fallback; - - if (_set_pc) - flags|=OP_SETS_PC; - - //B / BL ? - if (CHK_BTS(7,25,5)) - { - verify(_set_pc); - if (!(flags&OP_IS_COND)) - flags&=~OP_READS_PC; //not COND doesn't read from pc - - flags|=OP_SETS_PC; //Branches Set pc .. - - //branch ! - return (opcd&(1<<24))?VOT_BL:VOT_B; - } - - //Common case: MOVCC PC,REG - if (CHK_BTS(0xFFFFFF,4,0x1A0F00)) - { - verify(_set_pc); - if (CC==CC_AL) - flags&=~OP_READS_PC; - - return VOT_BR; - } - - - //No support for COND branching opcodes apart from the forms above .. - if (CC!=CC_AL && _set_pc) - { - return VOT_Fallback; - } - - u32 RList=opcd&0xFFFF; - u32 Rn=(opcd>>16)&15; - -#define LDM_REGCNT() (cpuBitsSet[RList & 255] + cpuBitsSet[(RList >> 8) & 255]) - - - //Data Processing opcodes -- find using mask/key - //This will eventually be virtualised w/ register renaming - for( u32 i=0;i> 28)!=0xE) - { - flags |= OP_HAS_FLAGS_READ; - //if (flags & OP_WRITE_REG) - flags |= OP_HAS_RD_READ; - } - - //DPOP ! - - if ((ops[i].flags & OP_READ_FLAGS) || - ((ops[i].flags & OP_READ_FLAGS_S) && (opcd & (1<<20)))) - { - flags |= OP_HAS_FLAGS_READ; - } - - if ((ops[i].flags & OP_WRITE_FLAGS) || - ((ops[i].flags & OP_WRITE_FLAGS_S) && (opcd & (1<<20)))) - { - flags |= OP_HAS_FLAGS_WRITE; - } - - if(ops[i].flags & OP_WRITE_REG) - { - //All dpops that write, write to RD_12 - flags |= OP_HAS_RD_12; - verify(! (CHK_BTS(15,12,15) && CC!=CC_AL)); - } - if(ops[i].flags & OP_READ_REG_1) - { - //Reg 1 is RS_16 - flags |= OP_HAS_RS_16; +u32 DYNACALL ARM7Backend::StepMany(Arm7Context* ctx, u32 minCycles) { + u32 clockTicks = 0; - //reads from pc ? - READ_PC_CHECK(16); - } - - //op2 is imm or reg ? - if ( !(opcd & (1<<25)) ) - { - //its reg (register or imm shifted) - flags |= OP_HAS_RS_0; - //reads from pc ? - READ_PC_CHECK(0); - - //is it register shifted reg ? - if (opcd & (1<<4)) - { - verify(! (opcd & (1<<7)) ); //must be zero - flags |= OP_HAS_RS_8; - //can't be pc ... - verify(!CHK_BTS(15,8,15)); - } - else - { - //is it RRX ? - if ( ((opcd>>4)&7)==6) - { - //RRX needs flags to be read (even if the opcode doesn't) - flags |= OP_HAS_FLAGS_READ; - } - } - } - - return VOT_DataOp; - } - } - - //Lets try mem opcodes since its not data processing - - - - /* - Lets Check LDR/STR ! - - CCCC 01 0 P UBW L Rn Rd Offset -- LDR/STR (I=0) - */ - if ((opcd>>25)==(0xE4/2) ) - { - /* - I=0 - - Everything else handled - */ - arm_printf("ARM: MEM %08X L/S:%d, AWB:%d!\n",opcd,(opcd>>20)&1,(opcd>>21)&1); - - return VOT_Read; - } - else if ((opcd>>25)==(0xE6/2) && CHK_BTS(0x7,4,0) ) - { - arm_printf("ARM: MEM REG to Reg %08X\n",opcd); - - /* - I=1 - - Logical Left shift, only - */ - return VOT_Read; - } - //LDM common case - else if ((opcd>>25)==(0xE8/2) /*&& CHK_BTS(32768,0,0)*/ && CHK_BTS(1,22,0) && CHK_BTS(1,20,1) && LDM_REGCNT()==1) + while (clockTicks < minCycles) { - //P=0 - //U=1 - //L=1 - //W=1 - //S=0 - - u32 old_opcd=opcd; - - //One register xfered - //Can be rewriten as normal mem opcode .. - opcd=0xE4000000; - - //Imm offset - opcd |= 0<<25; - //Post incr - opcd |= old_opcd & (1<<24); - //Up/Dn - opcd |= old_opcd & (1<<23); - //Word/Byte - opcd |= 0<<22; - //Write back (must be 0 for PI) - opcd |= old_opcd & (1<<21); - //Load - opcd |= old_opcd & (1<<20); - - //Rn - opcd |= Rn<<16; - - //Rd - u32 Rd=findfirstset(RList)-1; - opcd |= Rd<<12; - - //Offset - opcd |= 4; - - arm_printf("ARM: MEM TFX R %08X\n",opcd); - - return VOT_Read; + clockTicks += Step(ctx); } - //STM common case - else if ((opcd>>25)==(0xE8/2) && CHK_BTS(1,22,0) && CHK_BTS(1,20,0) && LDM_REGCNT()==1) - { - //P=1 - //U=0 - //L=1 - //W=1 - //S=0 - - u32 old_opcd=opcd; - - //One register xfered - //Can be rewriten as normal mem opcode .. - opcd=0xE4000000; - - //Imm offset - opcd |= 0<<25; - //Pre/Post incr - opcd |= old_opcd & (1<<24); - //Up/Dn - opcd |= old_opcd & (1<<23); - //Word/Byte - opcd |= 0<<22; - //Write back - opcd |= old_opcd & (1<<21); - //Store/Load - opcd |= old_opcd & (1<<20); - - //Rn - opcd |= Rn<<16; - //Rd - u32 Rd=findfirstset(RList)-1; - opcd |= Rd<<12; - - //Offset - opcd |= 4; - - arm_printf("ARM: MEM TFX W %08X\n",opcd); - - return VOT_Read; - } - else if (CHK_BTS(0xE10F0FFF,0,0xE10F0000)) - { - return VOT_MRS; - } - else if (CHK_BTS(0xEFBFFFF0,0,0xE129F000)) - { - return VOT_MSR; - } - else if ((opcd>>25)==(0xE8/2) && CHK_BTS(32768,0,0)) - { - arm_printf("ARM: MEM FB %08X\n",opcd); - flags|=OP_MFB; //(flag Just for the fallback counters) - } - else - { - arm_printf("ARM: FB %08X\n",opcd); - } - - //by default fallback to interpr - return VOT_Fallback; -} - -//helpers ... -#if HOST_CPU == CPU_ARM64 -extern void LoadReg(eReg rd,u32 regn,ConditionCode cc=CC_AL); -extern void StoreReg(eReg rd,u32 regn,ConditionCode cc=CC_AL); -extern void armv_mov(ARM::eReg regd, ARM::eReg regn); -extern void armv_add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm); -extern void armv_sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm); -extern void armv_add(ARM::eReg regd, ARM::eReg regn, s32 imm); -extern void armv_lsl(ARM::eReg regd, ARM::eReg regn, u32 imm); -extern void armv_bic(ARM::eReg regd, ARM::eReg regn, u32 imm); -extern void *armv_start_conditional(ARM::ConditionCode cc); -extern void armv_end_conditional(void *ref); -// Use w25 for temp mem save because w9 is not callee-saved -#define r9 ((ARM::eReg)25) -#else -void LoadReg(eReg rd,u32 regn,ConditionCode cc=CC_AL) -{ - LDR(rd,r8,(u8*)®[regn].I-(u8*)®[0].I,Offset,cc); -} -void StoreReg(eReg rd,u32 regn,ConditionCode cc=CC_AL) -{ - STR(rd,r8,(u8*)®[regn].I-(u8*)®[0].I,Offset,cc); -} -void armv_mov(ARM::eReg regd, ARM::eReg regn) -{ - MOV(regd, regn); -} - -void armv_add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) -{ - ADD(regd, regn, regm); -} - -void armv_sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) -{ - SUB(regd, regn, regm); -} - -void armv_add(ARM::eReg regd, ARM::eReg regn, s32 imm) -{ - if (imm >= 0) - ADD(regd, regn, imm); - else - SUB(regd, regn, -imm); -} - -void armv_lsl(ARM::eReg regd, ARM::eReg regn, u32 imm) -{ - LSL(regd, regn, imm); -} - -void armv_bic(ARM::eReg regd, ARM::eReg regn, u32 imm) -{ - BIC(regd, regn, imm); -} - -void *armv_start_conditional(ARM::ConditionCode cc) -{ - return NULL; -} -void armv_end_conditional(void *ref) -{ -} -#endif - -//very quick-and-dirty register rename based virtualisation -u32 renamed_regs[16]; -u32 rename_reg_base; - -void RenameRegReset() -{ - rename_reg_base=r1; - memset(renamed_regs, 0, sizeof(renamed_regs)); -} - -//returns new reg #. didrn is true if a rename mapping was added -u32 RenameReg(u32 reg, bool& didrn) -{ - if (renamed_regs[reg] == 0) - { - renamed_regs[reg]=rename_reg_base; - rename_reg_base++; - didrn=true; - } - else - { - didrn=false; - } - - return renamed_regs[reg]; -} - -//For reg reads (they need to be loaded) -//load can be used to skip loading (for RD if not cond) -void LoadAndRename(u32& opcd, u32 bitpos, bool load,u32 pc) -{ - bool didrn; - u32 reg=(opcd>>bitpos)&15; - - u32 nreg=RenameReg(reg,didrn); - - opcd = (opcd& ~(15<>bitpos)&15; - - u32 nreg=RenameReg(reg,didrn); - - verify(!didrn); - - if (reg==15) - reg=R15_ARM_NEXT; - - StoreReg((eReg)nreg,reg); -} - -#if HOST_CPU == CPU_ARM64 -extern void LoadFlags(); -extern void StoreFlags(); -#else -//For COND -void LoadFlags() -{ - //Load flags - LoadReg(r0,RN_PSR_FLAGS); - //move them to flags register - MSR(0,8,r0); -} - -void StoreFlags() -{ - //get results from flags register - MRS(r1,0); - //Store flags - StoreReg(r1,RN_PSR_FLAGS); -} -#endif - -//Virtualise Data Processing opcode -void VirtualizeOpcode(u32 opcd,u32 flag,u32 pc) -{ - //Keep original opcode for info - u32 orig=opcd; - - //Load arm flags, RS0/8/16, RD12/16 (as indicated by the decoder flags) - - if (flag & OP_HAS_FLAGS_READ) - { - LoadFlags(); - } - - if (flag & OP_HAS_RS_0) - LoadAndRename(opcd,0,true,pc+8); - if (flag & OP_HAS_RS_8) - LoadAndRename(opcd,8,true,pc+8); - if (flag & OP_HAS_RS_16) - LoadAndRename(opcd,16,true,pc+8); - - if (flag & OP_HAS_RD_12) - LoadAndRename(opcd,12,flag&OP_HAS_RD_READ,pc+4); - - if (flag & OP_HAS_RD_16) - { - verify(! (flag & OP_HAS_RS_16)); - LoadAndRename(opcd,16,flag&OP_HAS_RD_READ,pc+4); - } - - //Opcode has been modified to use the new regs - //Emit it ... - arm_printf("Arm Virtual: %08X -> %08X\n",orig,opcd); - armEmit32(opcd); - - //Store arm flags, rd12/rd16 (as indicated by the decoder flags) - if (flag & OP_HAS_RD_12) - StoreAndRename(orig,12); - - if (flag & OP_HAS_RD_16) - StoreAndRename(orig,16); - - //Sanity check .. - if (renamed_regs[15] != 0) - { - verify(flag&OP_READS_PC || (flag&OP_SETS_PC && !(flag&OP_IS_COND))); - } - - if (flag & OP_HAS_FLAGS_WRITE) - StoreFlags(); -} - -u32 nfb,ffb,bfb,mfb; - -void *armGetEmitPtr() -{ - if (icPtr < (ICache+ICacheSize-1024)) //ifdebug - return static_cast(icPtr); - - return NULL; -} - -#if HOST_CPU == CPU_X86 && FEAT_AREC != DYNAREC_NONE - -/* X86 backend - * Uses a mix of - * x86 code - * Virtualised arm code (using the varm interpreter) - * Emulated arm fallbacks (using the aica arm interpreter) - * - * The goal is to run as much code possible under the varm interpreter - * so it will run on arm w/o changes. A few opcodes are missing from varm - * (MOV32 is a notable case) and as such i've added a few varm_* hooks - * - * This code also performs a LOT of compiletime and runtime state/value sanity checks. - * We don't care for speed here ... -*/ - -#include "emitter/x86_emitter.h" - -static x86_block* x86e; - -void DumpRegs(const char* output) -{ - static FILE* f=fopen(output, "w"); - static int id=0; -#if 0 - if (490710==id) - { - __asm int 3; - } -#endif - verify(id!=137250); -#if 1 - fprintf(f,"%d\n",id); - //for(int i=0;i<14;i++) - { - int i=R15_ARM_NEXT; - fprintf(f,"r%d=%08X\n",i,reg[i].I); - } -#endif - id++; -} - -void DYNACALL PrintOp(u32 opcd) -{ - printf("%08X\n",opcd); -} - -void armv_imm_to_reg(u32 regn, u32 imm) -{ - x86e->Emit(op_mov32,®[regn].I,imm); -} - -void armv_MOV32(eReg regn, u32 imm) -{ - x86e->Emit(op_mov32,&virt_arm_reg(regn),imm); -} - -void armv_call(void* loc) -{ - x86e->Emit(op_call,x86_ptr_imm(loc)); -} - -x86_Label* end_lbl; - -void armv_setup() -{ - //Setup emitter - x86e = new x86_block(); - x86e->Init(0,0); - x86e->x86_buff=(u8*)EMIT_GET_PTR(); - x86e->x86_size=1024*64; - x86e->do_realloc=false; - - - //load base reg .. - x86e->Emit(op_mov32,&virt_arm_reg(8),(u32)&arm_Reg[0]); - - //the "end" label is used to exit from the block, if a code modification (expected opcode // actual opcode in ram) is detected - end_lbl=x86e->CreateLabel(false,0); -} - -void armv_intpr(u32 opcd) -{ - //Call interpreter - x86e->Emit(op_mov32,ECX,opcd); - x86e->Emit(op_call,x86_ptr_imm(&arm_single_op)); -} - -void armv_end(void* codestart, u32 cycles) -{ - //Normal block end - //Move counter to EAX for return, pop ESI, ret - x86e->Emit(op_sub32,ESI,cycles); - x86e->Emit(op_jns,x86_ptr_imm(arm_dispatch)); - x86e->Emit(op_jmp,x86_ptr_imm(arm_exit)); - - //Fluch cache, move counter to EAX, pop, ret - //this should never happen (triggers a breakpoint on x86) - x86e->MarkLabel(end_lbl); - x86e->Emit(op_int3); - x86e->Emit(op_call,x86_ptr_imm(FlushCache)); - x86e->Emit(op_sub32,ESI,cycles); - x86e->Emit(op_jmp,x86_ptr_imm(arm_dispatch)); - - //Generate the code & apply fixups/relocations as needed - x86e->Generate(); - - //Use space from the dynarec buffer - icPtr+=x86e->x86_indx; - - //Delete the x86 emitter ... - delete x86e; -} - -//sanity check: non branch doesn't set pc -void armv_check_pc(u32 pc) -{ - x86e->Emit(op_cmp32,&armNextPC,pc); - x86_Label* nof=x86e->CreateLabel(false,0); - x86e->Emit(op_je,nof); - x86e->Emit(op_int3); - x86e->MarkLabel(nof); -} - -//sanity check: stale cache -void armv_check_cache(u32 opcd, u32 pc) -{ - x86e->Emit(op_cmp32,&CPUReadMemoryQuick(pc),opcd); - x86_Label* nof=x86e->CreateLabel(false,0); - x86e->Emit(op_je,nof); - x86e->Emit(op_int3); - x86e->MarkLabel(nof); -} - -//profiler hook -void armv_prof(OpType opt,u32 op,u32 flags) -{ - if (VOT_Fallback!=opt) - x86e->Emit(op_add32,&nfb,1); - else - { - if (flags & OP_SETS_PC) - x86e->Emit(op_add32,&bfb,1); - else if (flags & OP_MFB) - x86e->Emit(op_add32,&mfb,1); - else - x86e->Emit(op_add32,&ffb,1); - } -} - -naked void DYNACALL arm_compilecode() -{ -#if HOST_OS == OS_LINUX - __asm ( "call CompileCode \n\t" - "mov $0, %%eax \n\t" - "jmp arm_dispatch \n" - : - ); -#else - __asm - { - call CompileCode; - mov eax,0; - jmp arm_dispatch; - } -#endif -} - -naked void DYNACALL arm_mainloop(u32 cycl, void* regs, void* entrypoints) -{ -#if HOST_OS == OS_LINUX - __asm ( "push %%esi \n\t" - "mov %%ecx, %%esi \n\t" - "add %0, %%esi \n\t" - "mov $0, %%eax \n\t" - "jmp arm_dispatch \n\t" - - "arm_exit_linux: \n\t" - "mov %%esi, %0 \n\t" - "pop %%esi \n" - : - : "m" (reg[CYCL_CNT].I) - ); -#else - __asm - { - push esi - - mov esi,ecx - add esi,reg[CYCL_CNT*4].I - - mov eax,0; - jmp arm_dispatch - } -#endif -} - -naked void arm_dispatch() -{ -#if HOST_OS == OS_LINUX - __asm ( "arm_dispatch: \n\t" - "mov %0, %%eax \n\t" - "and $0x7FFFFC, %%eax \n\t" - "cmp $0, %1 \n\t" - "jne arm_dofiq \n\t" - "jmp *%2(%%eax) \n" - : - : "m" (reg[R15_ARM_NEXT].I), - "m" (reg[INTR_PEND].I), - "m" (EntryPoints) - ); - - __asm ("arm_dofiq: \n\t" - "call CPUFiq \n\t" - "jmp arm_dispatch \n" - : - ); -#else - __asm - { -arm_disp: - mov eax,reg[R15_ARM_NEXT*4].I - and eax,0x7FFFFC - cmp reg[INTR_PEND*4].I,0 - jne arm_dofiq - jmp [EntryPoints+eax] - -arm_dofiq: - call CPUFiq - jmp arm_disp - } -#endif -} - -naked void arm_exit() -{ -#if HOST_OS == OS_LINUX - __asm ( "jmp arm_exit_linux" :); -#else - __asm - { - arm_exit: - mov reg[CYCL_CNT*4].I,esi - pop esi - ret - } -#endif -} -#elif (HOST_CPU == CPU_ARM) - -/* - * - * ARMv7 Compiler - * - */ - -//mprotect and stuff .. - -#include - -void armEmit32(u32 emit32) -{ - if (icPtr >= (ICache+ICacheSize-1024)) - die("ICache is full, invalidate old entries ..."); //ifdebug - - *(u32*)icPtr = emit32; - icPtr+=4; -} - -#if HOST_OS==OS_DARWIN -#include -extern "C" void armFlushICache(void *code, void *pEnd) { - sys_dcache_flush(code, (u8*)pEnd - (u8*)code + 1); - sys_icache_invalidate(code, (u8*)pEnd - (u8*)code + 1); -} -#else -extern "C" void armFlushICache(void *bgn, void *end) { - __builtin___clear_cache((char*)bgn, (char*)end); -} -#endif - - -void armv_imm_to_reg(u32 regn, u32 imm) -{ - MOV32(r0,imm); - StoreReg(r0,regn); -} - -void armv_call(void* loc) -{ - CALL((u32)loc); -} - -void armv_setup() -{ - //Setup emitter - - //r9: temp for mem ops (PI WB) - //r8: base - //Stored on arm_mainloop so no need for push/pop -} - -void armv_intpr(u32 opcd) -{ - //Call interpreter - MOV32(r0,opcd); - CALL((u32)arm_single_op); -} - -void armv_end(void* codestart, u32 cycl) -{ - //Normal block end - //cycle counter rv - - //pop registers & return - if (is_i8r4(cycl)) - SUB(r5,r5,cycl,true); - else - { - u32 togo = cycl; - while(ARMImmid8r4_enc(togo) == -1) - { - SUB(r5,r5,256); - togo -= 256; - } - SUB(r5,r5,togo,true); - } - JUMP((u32)&arm_exit,CC_MI); //statically predicted as not taken - JUMP((u32)&arm_dispatch); - - armFlushICache(codestart,(void*)EMIT_GET_PTR()); -} - -//Hook cus varm misses this, so x86 needs special code -void armv_MOV32(eReg regn, u32 imm) -{ - MOV32(regn,imm); -} - -/* - No sanity checks on arm .. -*/ - -#endif // HOST_CPU == CPU_ARM - -//Run a timeslice for ARMREC -//CycleCount is pretty much fixed to (512*32) for now (might change to a diff constant, but will be constant) -void arm_Run(u32 CycleCount) -{ - if (!Arm7Enabled) - return; - - for (int i=0;i<32;i++) - { - arm_mainloop(CycleCount/32, arm_Reg, EntryPoints); - libAICA_TimeStep(); - } - - /* - s32 clktks=reg[CYCL_CNT].I+CycleCount; - - //While we have time to spend - do - { - //Check for interrupts - if (reg[INTR_PEND].I) - { - CPUFiq(); - } - - //lookup code at armNextPC, run a block & remove its cycles from the timeslice - clktks-=EntryPoints[(armNextPC & ARAM_MASK)/4](); - - #if HOST_CPU==CPU_X86 - verify(armNextPC<=ARAM_MASK); - #endif - } while(clktks>0); - - reg[CYCL_CNT].I=clktks; - */ -} - - -#undef r - -/* - TODO: - R15 read/writing is kind of .. weird - Gotta investigate why .. -*/ - -//Mem operand 2 calculation, if Reg or large imm -void MemOperand2(eReg dst,bool I, bool U,u32 offs, u32 opcd) -{ - if (I==true) - { - u32 Rm=(opcd>>0)&15; - verify(CHK_BTS(7,4,0));// only SHL mode - LoadReg(r1,Rm); - u32 SA=31&(opcd>>7); - //can't do shifted add for now -- EMITTER LIMIT -- - if (SA) - armv_lsl(r1, r1, SA); - } - else - { - armv_MOV32(r1,offs); - } - - if (U) - armv_add(dst, r0, r1); - else - armv_sub(dst, r0, r1); -} - -template -void DYNACALL MSR_do(u32 v) -{ -#if HOST_CPU==CPU_X86 - v=virt_arm_reg(r0); -#endif - if (Pd) - { - if(armMode > 0x10 && armMode < 0x1f) /* !=0x10 ?*/ - { - reg[17].I = (reg[17].I & 0x00FFFF00) | (v & 0xFF0000FF); - } - } - else - { - CPUUpdateCPSR(); - - u32 newValue = reg[16].I; - if(armMode > 0x10) - { - newValue = (newValue & 0xFFFFFF00) | (v & 0x000000FF); - } - - newValue = (newValue & 0x00FFFFFF) | (v & 0xFF000000); - newValue |= 0x10; - if(armMode > 0x10) - { - CPUSwitchMode(newValue & 0x1f, false); - } - reg[16].I = newValue; - CPUUpdateFlags(); - } -} - -//Compile & run block of code, starting armNextPC -extern "C" void CompileCode() -{ - //Get the code ptr - void* rv=EMIT_GET_PTR(); - - //update the block table - EntryPoints[(armNextPC&ARAM_MASK)/4]=rv; - - //setup local pc counter - u32 pc=armNextPC; - - //emitter/block setup - armv_setup(); - - //the ops counter is used to terminate the block (max op count for a single block is 32 currently) - //We don't want too long blocks for timing accuracy - u32 ops=0; - - u32 Cycles=0; - - for(;;) - { - ops++; - - //Read opcode ... - u32 opcd=CPUReadMemoryQuick(pc); - -#if HOST_CPU==CPU_X86 - //Sanity check: Stale cache - armv_check_cache(opcd,pc); -#endif - - u32 op_flags; - - //Decode & handle opcode - - OpType opt=DecodeOpcode(opcd,op_flags); - - switch(opt) - { - case VOT_DataOp: - { - //data processing opcode that can be virtualised - RenameRegReset(); - - /* - if (op_flags & OP_READS_PC) - armv_imm_to_reg(15,pc+8); - - else*/ -#if HOST_CPU==CPU_X86 - armv_imm_to_reg(15,rand()); -#endif - - VirtualizeOpcode(opcd,op_flags,pc); - -#if HOST_CPU==CPU_X86 - armv_imm_to_reg(15,rand()); -#endif - } - break; - - case VOT_BR: - { - //Branch to reg - ConditionCode cc=(ConditionCode)(opcd>>28); - - verify(op_flags&OP_SETS_PC); - - if (cc!=CC_AL) - { - LoadFlags(); - armv_imm_to_reg(R15_ARM_NEXT,pc+4); - } - - LoadReg(r0,opcd&0xF); -#if HOST_CPU==CPU_X86 - x86e->Emit(op_and32, &virt_arm_reg(0), 0xfffffffc); -#else - armv_bic(r0, r0, 3); -#endif - void *ref = armv_start_conditional(cc); - StoreReg(r0,R15_ARM_NEXT,cc); - armv_end_conditional(ref); - } - break; - - case VOT_B: - case VOT_BL: - { - //Branch to imm - - //<<2, sign extend ! - s32 offs=((s32)opcd<<8)>>6; - - if (op_flags & OP_IS_COND) - { - armv_imm_to_reg(R15_ARM_NEXT,pc+4); - LoadFlags(); - ConditionCode cc=(ConditionCode)(opcd>>28); - void *ref = armv_start_conditional(cc); - if (opt==VOT_BL) - { - armv_MOV32(r0,pc+4); - StoreReg(r0,14,cc); - } - - armv_MOV32(r0,pc+8+offs); - StoreReg(r0,R15_ARM_NEXT,cc); - armv_end_conditional(ref); - } - else - { - if (opt==VOT_BL) - armv_imm_to_reg(14,pc+4); - - armv_imm_to_reg(R15_ARM_NEXT,pc+8+offs); - } - } - break; - - case VOT_Read: - { - //LDR/STR - - u32 offs=opcd&4095; - bool U=opcd&(1<<23); - bool Pre=opcd&(1<<24); - - bool W=opcd&(1<<21); - bool I=opcd&(1<<25); - - u32 Rn=(opcd>>16)&15; - u32 Rd=(opcd>>12)&15; - - bool DoWB=W || (!Pre && Rn!=Rd); //Write back if: W, Post update w/ Rn!=Rd - bool DoAdd=DoWB || Pre; - - //Register not updated anyway - if (I==false && offs==0) - { - DoWB=false; - DoAdd=false; - } - - //verify(Rd!=15); - verify(!((Rn==15) && DoWB)); - - //AGU - if (Rn!=15) - { - LoadReg(r0,Rn); - - if (DoAdd) - { - eReg dst=Pre?r0:r9; - - if (I==false && is_i8r4(offs)) - { - if (U) - armv_add(dst, r0, offs); - else - armv_add(dst, r0, -offs); - } - else - { - MemOperand2(dst,I,U,offs,opcd); - } - - if (DoWB && dst==r0) - armv_mov(r9, r0); - } - } - else - { - u32 addr=pc+8; - - if (Pre && offs && I==false) - { - addr+=U?offs:-offs; - } - - armv_MOV32(r0,addr); - - if (Pre && I==true) - { - MemOperand2(r1,I,U,offs,opcd); - armv_add(r0, r0, r1); - } - } - - if (CHK_BTS(1,20,0)) - { - if (Rd==15) - { - armv_MOV32(r1,pc+12); - } - else - { - LoadReg(r1,Rd); - } - } - //Call handler - armv_call(GetMemOp(CHK_BTS(1,20,1),CHK_BTS(1,22,1))); - - if (CHK_BTS(1,20,1)) - { - if (Rd==15) - { - verify(op_flags & OP_SETS_PC); - StoreReg(r0,R15_ARM_NEXT); - } - else - { - StoreReg(r0,Rd); - } - } - - //Write back from AGU, if any - if (DoWB) - { - StoreReg(r9,Rn); - } - } - break; - - case VOT_MRS: - { - u32 Rd=(opcd>>12)&15; - - armv_call((void*)&CPUUpdateCPSR); - - if (opcd & (1<<22)) - { - LoadReg(r0,17); - } - else - { - LoadReg(r0,16); - } - - StoreReg(r0,Rd); - } - break; - - case VOT_MSR: - { - u32 Rm=(opcd>>0)&15; - - LoadReg(r0,Rm); - if (opcd & (1<<22)) - armv_call((void*)(void (DYNACALL*)(u32))&MSR_do<1>); - else - armv_call((void*)(void (DYNACALL*)(u32))&MSR_do<0>); - - if (op_flags & OP_SETS_PC) - armv_imm_to_reg(R15_ARM_NEXT,pc+4); - } - break; - /* - //LDM is disabled for now - //Common cases of LDM/STM are converted to STR/LDR (tsz==1) - //Other cases are very uncommon and not worth implementing - case VOT_LDM: - { - //P=0, U=1, S=0, L=1, W=1 - - u32 Rn=(opcd>>16)&15; - u32 RList=opcd&0xFFFF; - u32 tsz=(cpuBitsSet[RList & 255] + cpuBitsSet[(RList >> 8) & 255]); - - verify(CHK_BTS(1,24,0)); //P=0 - verify(CHK_BTS(1,23,1)); //U=1 - verify(CHK_BTS(1,22,0)); //S=0 - verify(CHK_BTS(1,21,1)); //W=1 - verify(CHK_BTS(1,20,1)); //L=0 - - - //if (tsz!=1) - // goto FALLBACK; - - bool _W=true; //w=1 - - - if (RList & (1<); - - if (_W) - { - StoreReg(r9,Rn); - } - } - break; - */ - - case VOT_Fallback: - { - //interpreter fallback - - //arm_single_op needs PC+4 on r15 - //TODO: only write it if needed -> Probably not worth the code, very few fallbacks now... - armv_imm_to_reg(15,pc+8); - - //For cond branch, MSR - if (op_flags & OP_SETS_PC) - armv_imm_to_reg(R15_ARM_NEXT,pc+4); - -#if HOST_CPU==CPU_X86 - if ( !(op_flags & OP_SETS_PC) ) - armv_imm_to_reg(R15_ARM_NEXT,pc+4); -#endif - - armv_intpr(opcd); - -#if HOST_CPU==CPU_X86 - if ( !(op_flags & OP_SETS_PC) ) - { - //Sanity check: next pc - armv_check_pc(pc+4); -#if 0 - x86e->Emit(op_mov32,ECX,opcd); - x86e->Emit(op_call,x86_ptr_imm(PrintOp)); -#endif - } -#endif - } - break; - - default: - die("can't happen\n"); - } - - //Lets say each opcode takes 9 cycles for now .. - Cycles+=9; - -#if HOST_CPU==CPU_X86 - armv_imm_to_reg(15,0xF87641FF); - - armv_prof(opt,opcd,op_flags); -#endif - - //Branch ? - if (op_flags & OP_SETS_PC) - { - //x86e->Emit(op_call,x86_ptr_imm(DumpRegs)); // great debugging tool - arm_printf("ARM: %06X: Block End %d\n",pc,ops); - -#if HOST_CPU==CPU_X86 && 0 - //Great fallback finder, also spams console - if (opt==VOT_Fallback) - { - x86e->Emit(op_mov32,ECX,opcd); - x86e->Emit(op_call,x86_ptr_imm(PrintOp)); - } -#endif - break; - } - - //block size limit ? - if (ops>32) - { - arm_printf("ARM: %06X: Block split %d\n",pc,ops); - - armv_imm_to_reg(R15_ARM_NEXT,pc+4); - break; - } - - //Goto next opcode - pc+=4; - } - - armv_end((void*)rv,Cycles); -} - - - -void FlushCache() -{ - icPtr=ICache; - for (u32 i = 0; i < ARRAY_SIZE(EntryPoints); i++) - EntryPoints[i] = (void*)&arm_compilecode; -} - - - -#if HOST_CPU == CPU_X86 -#if HOST_OS == OS_WINDOWS -#include -#endif - -// These have to be declared somewhere or linker dies -u8* ARM::emit_opt=0; -eReg ARM::reg_addr; -eReg ARM::reg_dst; -s32 ARM::imma; - -void armEmit32(u32 emit32) -{ - if (icPtr >= (ICache + ICacheSize - 64*1024)) { - die("ICache is full, invalidate old entries ..."); //ifdebug - } - - x86e->Emit(op_mov32,ECX,emit32); - x86e->Emit(op_call,x86_ptr_imm(virt_arm_op)); -} - -#endif // X86 - - -void armt_init() -{ - InitHash(); - - //align to next page .. - ICache = (u8*)(((unat)ARM7_TCB+4095)& ~4095); - - #if HOST_OS==OS_DARWIN - //Can't just mprotect on iOS - munmap(ICache, ICacheSize); - ICache = (u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); - #endif - -#if HOST_OS == OS_WINDOWS - DWORD old; - VirtualProtect(ICache,ICacheSize,PAGE_EXECUTE_READWRITE,&old); -#elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN - - printf("\n\t ARM7_TCB addr: %p | from: %p | addr here: %p\n", ICache, ARM7_TCB, armt_init); - - if (mprotect(ICache, ICacheSize, PROT_EXEC|PROT_READ|PROT_WRITE)) - { - perror("\n\tError - Couldn’t mprotect ARM7_TCB!"); - verify(false); - } - -#if TARGET_IPHONE - memset((u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0),0xFF,ICacheSize); -#else - memset(ICache,0xFF,ICacheSize); -#endif - -#endif - - icPtr=ICache; -} - - -#endif + return clockTicks; +} \ No newline at end of file diff --git a/libswirl/hw/arm7/arm7.h b/libswirl/hw/arm7/arm7.h index 216f9ec648..8ba7a7b4ba 100644 --- a/libswirl/hw/arm7/arm7.h +++ b/libswirl/hw/arm7/arm7.h @@ -1,10 +1,33 @@ #pragma once #include "types.h" -void arm_Init(); -void arm_Reset(); -void arm_Run(u32 uNumCycles); -void arm_SetEnabled(bool enabled); +struct Arm7Context; + +struct ARM7Backend { + + static u32 DYNACALL singleOp(Arm7Context* ctx, u32 opcode); + static u32 DYNACALL Step(Arm7Context* ctx); + static u32 DYNACALL StepMany(Arm7Context* ctx, u32 minCycles); + + static void DYNACALL CPUSwitchMode(Arm7Context* ctx, int mode, bool saveState); + static void DYNACALL CPUUpdateFlags(Arm7Context* ctx); + static void DYNACALL UpdateInterrupts(Arm7Context* ctx); + static void DYNACALL CPUUpdateCPSR(Arm7Context* ctx); + static void DYNACALL CPUFiq(Arm7Context* ctx); + + virtual void Run(u32 uNumCycles) = 0; + virtual void UpdateInterrupts() = 0; + virtual void InvalidateJitCache() = 0; + virtual void* GetEntrypointBase() = 0; + + virtual ~ARM7Backend() { } + + static ARM7Backend* CreateInterpreter(Arm7Context* ctx); + static ARM7Backend* CreateJit(Arm7Context* ctx); +}; + +void libARM_SetResetState(bool Reset); +void libARM_InterruptChange(u32 bits, u32 L); #define arm_sh4_bias (2) diff --git a/libswirl/hw/arm7/arm7_context.h b/libswirl/hw/arm7/arm7_context.h new file mode 100644 index 0000000000..805e721c01 --- /dev/null +++ b/libswirl/hw/arm7/arm7_context.h @@ -0,0 +1,118 @@ +#pragma once +#include "types.h" + +enum +{ + RN_CPSR = 16, + RN_SPSR = 17, + + R13_IRQ = 18, + R14_IRQ = 19, + SPSR_IRQ = 20, + R13_USR = 26, + R14_USR = 27, + R13_SVC = 28, + R14_SVC = 29, + SPSR_SVC = 30, + R13_ABT = 31, + R14_ABT = 32, + SPSR_ABT = 33, + R13_UND = 34, + R14_UND = 35, + SPSR_UND = 36, + R8_FIQ = 37, + R9_FIQ = 38, + R10_FIQ = 39, + R11_FIQ = 40, + R12_FIQ = 41, + R13_FIQ = 42, + R14_FIQ = 43, + SPSR_FIQ = 44, + RN_PSR_FLAGS = 45, + R15_ARM_NEXT = 46, + INTR_PEND = 47, + CYCL_CNT = 48, + + RN_ARM_REG_COUNT, +}; + +typedef union +{ + struct + { + u8 B0; + u8 B1; + u8 B2; + u8 B3; + } B; + + struct + { + u16 W0; + u16 W1; + } W; + + union + { + struct + { + u32 _pad0 : 28; + u32 V : 1; //Bit 28 + u32 C : 1; //Bit 29 + u32 Z : 1; //Bit 30 + u32 N : 1; //Bit 31 + }; + + struct + { + u32 _pad1 : 28; + u32 NZCV : 4; //Bits [31:28] + }; + } FLG; + + struct + { + u32 M : 5; //mode, PSR[4:0] + u32 _pad0 : 1; //not used / zero + u32 F : 1; //FIQ disable, PSR[6] + u32 I : 1; //IRQ disable, PSR[7] + u32 _pad1 : 20; //not used / zero + u32 NZCV : 4; //Bits [31:28] + } PSR; + + u32 I; +} arm7_reg; + +struct ARM7Backend; +struct AICA; + +struct Arm7Context +{ + DECL_ALIGN(8) arm7_reg regs[RN_ARM_REG_COUNT]; + + u8* aica_ram; + u32 aram_mask; + + bool armIrqEnable; + bool armFiqEnable; + //bool armState; + int armMode; + + //Set to true when aica interrupt is pending + bool aica_interr = false; + u32 aica_reg_L = 0; + //Set to true when the out of the intc is 1 + bool e68k_out = false; + u32 e68k_reg_L; + u32 e68k_reg_M = 0; //constant ? + bool enabled; + + u8 (DYNACALL*read8)(u32 addr, Arm7Context* ctx); + u32 (DYNACALL*read32)(u32 addr, Arm7Context* ctx); + + void (DYNACALL*write8)(u32 addr, u8 data, Arm7Context* ctx); + void (DYNACALL*write32)(u32 addr, u32 data, Arm7Context* ctx); + + ARM7Backend* backend; + AICA* aica; +}; \ No newline at end of file diff --git a/libswirl/hw/arm7/arm7_interpreter.cpp b/libswirl/hw/arm7/arm7_interpreter.cpp new file mode 100644 index 0000000000..63730dd462 --- /dev/null +++ b/libswirl/hw/arm7/arm7_interpreter.cpp @@ -0,0 +1,41 @@ +#include "arm7.h" +#include "arm_mem.h" +#include "arm7_context.h" + +/* +void CPUSwitchMode(int mode, bool saveState, bool breakLoop=true); +extern "C" void CPUFiq(); +void CPUUpdateCPSR(); +void CPUUpdateFlags(); +void CPUSoftwareInterrupt(int comment); +void CPUUndefinedException(); +*/ + +struct Arm7Interpreter_impl : ARM7Backend { + + Arm7Context* ctx; + + Arm7Interpreter_impl(Arm7Context* ctx) : ctx(ctx) { } + + void UpdateInterrupts() + { + ARM7Backend::UpdateInterrupts(ctx); + } + + void Run(u32 CycleCount) + { + StepMany(ctx, CycleCount); + } + + void InvalidateJitCache() { + + } + + void* GetEntrypointBase() { + return nullptr; + } +}; + +ARM7Backend* ARM7Backend::CreateInterpreter(Arm7Context* ctx) { + return new Arm7Interpreter_impl(ctx); +} \ No newline at end of file diff --git a/libswirl/hw/arm7/arm7_jit_virt.cpp b/libswirl/hw/arm7/arm7_jit_virt.cpp new file mode 100644 index 0000000000..1a059e2b89 --- /dev/null +++ b/libswirl/hw/arm7/arm7_jit_virt.cpp @@ -0,0 +1,1179 @@ +#include "arm7.h" +#include "arm_mem.h" +#include "virt_arm.h" +#include "arm7_context.h" +#include "hw/aica/aica_mmio.h" +#include + +#include "arm7_jit_virt_backend.h" + +#include "jit/emitter/arm32/arm_coding.h" + + +using namespace ARM; + +#define arm_printf(...) + +#define arm_reg ctx->regs +#define armMode ctx->armMode + +#define armNextPC arm_reg[R15_ARM_NEXT].I +#define CPUReadMemoryQuick(addr) (*(u32*)&ctx->aica_ram[addr&ctx->aram_mask]) + +static const u8 cpuBitsSet[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; + +//findfirstset -- used in LDM/STM handling +#if HOST_CPU==CPU_X86 && BUILD_COMPILER != COMPILER_GCC +#include + +u32 findfirstset(u32 v) +{ + unsigned long rv; + _BitScanForward(&rv, v); + return rv + 1; +} +#else +#define findfirstset __builtin_ffs +#endif + + +#if FEAT_AREC != DYNAREC_NONE +Arm7VirtBackend* virtBackend; + +/* + * + * X86 Compiler + * + */ + +void* EntryPoints[ARAM_SIZE_MAX / 4]; + + + +template +void DYNACALL MSR_do(Arm7Context* ctx, u32 v) +{ + if (Pd) + { + if (armMode > 0x10 && armMode < 0x1f) /* !=0x10 ?*/ + { + arm_reg[17].I = (arm_reg[17].I & 0x00FFFF00) | (v & 0xFF0000FF); + } + } + else + { + ARM7Backend::CPUUpdateCPSR(ctx); + + u32 newValue = arm_reg[16].I; + if (armMode > 0x10) + { + newValue = (newValue & 0xFFFFFF00) | (v & 0x000000FF); + } + + newValue = (newValue & 0x00FFFFFF) | (v & 0xFF000000); + newValue |= 0x10; + if (armMode > 0x10) + { + ARM7Backend::CPUSwitchMode(ctx, newValue & 0x1f, false); + } + arm_reg[16].I = newValue; + ARM7Backend::CPUUpdateFlags(ctx); + } +} + + +// FIXME: arm7 decoder + + + /* + + ARM + ALU opcodes (more or less) + + (flags,rv)=opcode(flags,in regs ..) + rd=rv; + if (set_flags) + PSR=(rd==pc?CPSR:flags); + + (mem ops) + Writes of R15: + R15+12 + R15 as base: + R15+8 + LDR + rd=mem[addr(in regs)] + LDM + + ... + STR/STM: pc+12 + + + /// + + "cached" interpreter: + Set PC+12 to PC reg + mov opcode + call function + + if (pc settting opcode) + lookup again using armNextPC + + + PC setting opcodes + ALU with write to PC + LDR with write to PC (SDT) + LDM with write to PC (BDT) + B/BL + SWI + + + Indirect, via write to PSR/Mode + MSR + */ + + +struct ArmDPOP +{ + u32 key; + u32 mask; + u32 flags; +}; + +vector ops; + + +#define DP_R_ROFC (OP_READ_FLAGS_S|OP_READ_REG_1) //Reads reg1, op2, flags if S +#define DP_R_ROF (OP_READ_FLAGS|OP_READ_REG_1) //Reads reg1, op2, flags (ADC & co) +#define DP_R_OFC (OP_READ_FLAGS_S) //Reads op2, flags if S + +#define DP_W_RFC (OP_WRITE_FLAGS_S|OP_WRITE_REG) //Writes reg, and flags if S +#define DP_W_F (OP_WRITE_FLAGS) //Writes only flags, always (S=1) + + + + + +void armEmit32(u32 emit32) { + virtBackend->Emit32(emit32); +} +void* armGetEmitPtr() { + return virtBackend->armGetEmitPtr(); +} + +struct Arm7JitVirt_impl : ARM7Backend { + unique_ptr armv; + Arm7Context* ctx; + Looppoints lps; + + Arm7JitVirt_impl(Arm7Context* ctx) : ctx(ctx) { + armv.reset(Arm7VirtBackend::Create(this, ctx)); + + armv->GenerateLooppoints(&lps); + + armt_init(); + } + + void UpdateInterrupts() + { + ARM7Backend::UpdateInterrupts(ctx); + } + + void Run(u32 CycleCount) + { + ((void (DYNACALL*)(u32))lps.mainloop)(CycleCount); + } + + //Emulate a single arm op, passed in opcode + //DYNACALL for ECX passing + + //FIXME: Move to diff file + /* + COND | 00 0 OP1 S Rn Rd SA ST 0 Rm -- Data opcode, PSR xfer (imm shifted reg) + | 00 0 OP1 S Rn Rd Rs 0 ST 1 Rm -- Data opcode, PSR xfer (reg shifted reg) + | 00 0 0 00A S Rd Rn Rs 1001 Rm -- Mult + | 00 0 1 0B0 0 Rn Rd 0000 1001 Rm -- SWP + | 00 1 OP1 S Rn Rd imm8r4 -- Data opcode, PSR xfer (imm8r4) + + | 01 0 P UBW L Rn Rd Offset -- LDR/STR (I=0) + | 01 1 P UBW L Rn Rd SHAM SHTP 0 Rs -- LDR/STR (I=1) + | 10 0 P USW L Rn {RList} -- LDM/STM + | 10 1 L {offset} -- B/BL + | 11 1 1 X* -- SWI + + (undef cases) + | 01 1 XXXX X X* X* X* 1 XXXX - Undefined (LDR/STR w/ encodings that would be reg. based shift) + | 11 0 PUNW L Rn {undef} -- Copr. Data xfer (undef) + | 11 1 0 CPOP Crn Crd Cpn CP3 0 Crm -- Copr. Data Op (undef) + | 11 1 0 CPO3 L Crn Crd Cpn CP3 1 Crm -- Copr. Reg xf (undef) + + + Phase #1: + -Non branches that don't touch memory (pretty much: Data processing, Not MSR, Mult) + -Everything else is ifb + + Phase #2: + Move LDR/STR to templates + + Phase #3: + Move LDM/STM to templates + + + */ + + void AddDPOP(u32 subcd, u32 rflags, u32 wflags) + { + ArmDPOP op; + + u32 key = subcd << 21; + u32 mask = (15 << 21) | (7 << 25); + + op.flags = rflags | wflags; + + if (wflags == DP_W_F) + { + //also match S bit for opcodes that must write to flags (CMP & co) + mask |= 1 << 20; + key |= 1 << 20; + } + + //ISR form (bit 25=0, bit 4 = 0) + op.key = key; + op.mask = mask | (1 << 4); + ops.push_back(op); + + //RSR form (bit 25=0, bit 4 = 1, bit 7=0) + op.key = key | (1 << 4); + op.mask = mask | (1 << 4) | (1 << 7); + ops.push_back(op); + + //imm8r4 form (bit 25=1) + op.key = key | (1 << 25); + op.mask = mask; + ops.push_back(op); + } + + void InitHash() + { + /* + COND | 00 I OP1 S Rn Rd OPER2 -- Data opcode, PSR xfer + Data processing opcodes + */ + + //AND 0000 Rn, OPER2, {Flags} Rd, {Flags} + //EOR 0001 Rn, OPER2, {Flags} Rd, {Flags} + //SUB 0010 Rn, OPER2, {Flags} Rd, {Flags} + //RSB 0011 Rn, OPER2, {Flags} Rd, {Flags} + //ADD 0100 Rn, OPER2, {Flags} Rd, {Flags} + //ORR 1100 Rn, OPER2, {Flags} Rd, {Flags} + //BIC 1110 Rn, OPER2, {Flags} Rd, {Flags} + AddDPOP(0, DP_R_ROFC, DP_W_RFC); + AddDPOP(1, DP_R_ROFC, DP_W_RFC); + AddDPOP(2, DP_R_ROFC, DP_W_RFC); + AddDPOP(3, DP_R_ROFC, DP_W_RFC); + AddDPOP(4, DP_R_ROFC, DP_W_RFC); + AddDPOP(12, DP_R_ROFC, DP_W_RFC); + AddDPOP(14, DP_R_ROFC, DP_W_RFC); + + //ADC 0101 Rn, OPER2, Flags Rd, {Flags} + //SBC 0110 Rn, OPER2, Flags Rd, {Flags} + //RSC 0111 Rn, OPER2, Flags Rd, {Flags} + AddDPOP(5, DP_R_ROF, DP_W_RFC); + AddDPOP(6, DP_R_ROF, DP_W_RFC); + AddDPOP(7, DP_R_ROF, DP_W_RFC); + + //TST 1000 S=1 Rn, OPER2, Flags Flags + //TEQ 1001 S=1 Rn, OPER2, Flags Flags + AddDPOP(8, DP_R_ROF, DP_W_F); + AddDPOP(9, DP_R_ROF, DP_W_F); + + //CMP 1010 S=1 Rn, OPER2 Flags + //CMN 1011 S=1 Rn, OPER2 Flags + AddDPOP(10, DP_R_ROF, DP_W_F); + AddDPOP(11, DP_R_ROF, DP_W_F); + + //MOV 1101 OPER2, {Flags} Rd, {Flags} + //MVN 1111 OPER2, {Flags} Rd, {Flags} + AddDPOP(13, DP_R_OFC, DP_W_RFC); + AddDPOP(15, DP_R_OFC, DP_W_RFC); + } + + void* GetMemOp(bool L, bool B) + { + if (L) + { + if (B) + return (void*)(u32(DYNACALL*)(u32, Arm7Context*)) ctx->read8; + else + return (void*)(u32(DYNACALL*)(u32, Arm7Context*)) ctx->read32; + } + else + { + if (B) + return (void*)(u32(DYNACALL*)(u32, u32, Arm7Context*)) ctx->write8; + else + return (void*)(u32(DYNACALL*)(u32, u32, Arm7Context*)) ctx->write32; + } + } + + //Decodes an opcode, returns type. + //opcd might be changed (currently for LDM/STM -> LDR/STR transforms) + OpType DecodeOpcode(u32& opcd, u32& flags) + { + //by default, PC has to be updated + flags = OP_READS_PC; + + u32 CC = (opcd >> 28); + + if (CC != CC_AL) + flags |= OP_IS_COND; + + //helpers ... +#define CHK_BTS(M,S,V) ( (M & (opcd>>S)) == (V) ) //Check bits value in opcode +#define IS_LOAD (opcd & (1<<20)) //Is L bit set ? (LDM/STM LDR/STR) +#define READ_PC_CHECK(S) if (CHK_BTS(15,S,15)) flags|=OP_READS_PC; + +//Opcode sets pc ? + bool _set_pc = + (CHK_BTS(3, 26, 0) && CHK_BTS(15, 12, 15)) || //Data processing w/ Rd=PC + (CHK_BTS(3, 26, 1) && CHK_BTS(15, 12, 15) && IS_LOAD) || //LDR/STR w/ Rd=PC + (CHK_BTS(7, 25, 4) && (opcd & 32768) && IS_LOAD) || //LDM/STM w/ PC in list + CHK_BTS(7, 25, 5) || //B or BL + CHK_BTS(15, 24, 15); //SWI + + //NV condition means VFP on newer cores, let interpreter handle it... + if (CC == 15) + return VOT_Fallback; + + if (_set_pc) + flags |= OP_SETS_PC; + + //B / BL ? + if (CHK_BTS(7, 25, 5)) + { + verify(_set_pc); + if (!(flags & OP_IS_COND)) + flags &= ~OP_READS_PC; //not COND doesn't read from pc + + flags |= OP_SETS_PC; //Branches Set pc .. + + //branch ! + return (opcd & (1 << 24)) ? VOT_BL : VOT_B; + } + + //Common case: MOVCC PC,REG + if (CHK_BTS(0xFFFFFF, 4, 0x1A0F00)) + { + verify(_set_pc); + if (CC == CC_AL) + flags &= ~OP_READS_PC; + + return VOT_BR; + } + + + //No support for COND branching opcodes apart from the forms above .. + if (CC != CC_AL && _set_pc) + { + return VOT_Fallback; + } + + u32 RList = opcd & 0xFFFF; + u32 Rn = (opcd >> 16) & 15; + +#define LDM_REGCNT() (cpuBitsSet[RList & 255] + cpuBitsSet[(RList >> 8) & 255]) + + + //Data Processing opcodes -- find using mask/key + //This will eventually be virtualised w/ register renaming + for (u32 i = 0; i < ops.size(); i++) + { + if (!_set_pc && ops[i].key == (opcd & ops[i].mask)) + { + //We fill in the cases that we have to read pc + flags &= ~OP_READS_PC; + + //Conditionals always need flags read ... + if ((opcd >> 28) != 0xE) + { + flags |= OP_HAS_FLAGS_READ; + //if (flags & OP_WRITE_REG) + flags |= OP_HAS_RD_READ; + } + + //DPOP ! + + if ((ops[i].flags & OP_READ_FLAGS) || + ((ops[i].flags & OP_READ_FLAGS_S) && (opcd & (1 << 20)))) + { + flags |= OP_HAS_FLAGS_READ; + } + + if ((ops[i].flags & OP_WRITE_FLAGS) || + ((ops[i].flags & OP_WRITE_FLAGS_S) && (opcd & (1 << 20)))) + { + flags |= OP_HAS_FLAGS_WRITE; + } + + if (ops[i].flags & OP_WRITE_REG) + { + //All dpops that write, write to RD_12 + flags |= OP_HAS_RD_12; + verify(!(CHK_BTS(15, 12, 15) && CC != CC_AL)); + } + + if (ops[i].flags & OP_READ_REG_1) + { + //Reg 1 is RS_16 + flags |= OP_HAS_RS_16; + + //reads from pc ? + READ_PC_CHECK(16); + } + + //op2 is imm or reg ? + if (!(opcd & (1 << 25))) + { + //its reg (register or imm shifted) + flags |= OP_HAS_RS_0; + //reads from pc ? + READ_PC_CHECK(0); + + //is it register shifted reg ? + if (opcd & (1 << 4)) + { + verify(!(opcd & (1 << 7))); //must be zero + flags |= OP_HAS_RS_8; + //can't be pc ... + verify(!CHK_BTS(15, 8, 15)); + } + else + { + //is it RRX ? + if (((opcd >> 4) & 7) == 6) + { + //RRX needs flags to be read (even if the opcode doesn't) + flags |= OP_HAS_FLAGS_READ; + } + } + } + + return VOT_DataOp; + } + } + + //Lets try mem opcodes since its not data processing + + + + /* + Lets Check LDR/STR ! + + CCCC 01 0 P UBW L Rn Rd Offset -- LDR/STR (I=0) + */ + + if ((opcd >> 25) == (0xE4 / 2)) + { + /* + I=0 + + Everything else handled + */ + arm_printf("ARM: MEM %08X L/S:%d, AWB:%d!\n", opcd, (opcd >> 20) & 1, (opcd >> 21) & 1); + + return VOT_Read; + } + else if ((opcd >> 25) == (0xE6 / 2) && CHK_BTS(0x7, 4, 0)) + { + arm_printf("ARM: MEM REG to Reg %08X\n", opcd); + + /* + I=1 + + Logical Left shift, only + */ + return VOT_Read; + } + //LDM common case + else if ((opcd >> 25) == (0xE8 / 2) /*&& CHK_BTS(32768,0,0)*/ && CHK_BTS(1, 22, 0) && CHK_BTS(1, 20, 1) && LDM_REGCNT() == 1) + { + //P=0 + //U=1 + //L=1 + //W=1 + //S=0 + + u32 old_opcd = opcd; + + //One register xfered + //Can be rewriten as normal mem opcode .. + opcd = 0xE4000000; + + //Imm offset + opcd |= 0 << 25; + //Post incr + opcd |= old_opcd & (1 << 24); + //Up/Dn + opcd |= old_opcd & (1 << 23); + //Word/Byte + opcd |= 0 << 22; + //Write back (must be 0 for PI) + opcd |= old_opcd & (1 << 21); + //Load + opcd |= old_opcd & (1 << 20); + + //Rn + opcd |= Rn << 16; + + //Rd + u32 Rd = findfirstset(RList) - 1; + opcd |= Rd << 12; + + //Offset + opcd |= 4; + + arm_printf("ARM: MEM TFX R %08X\n", opcd); + + return VOT_Read; + } + //STM common case + else if ((opcd >> 25) == (0xE8 / 2) && CHK_BTS(1, 22, 0) && CHK_BTS(1, 20, 0) && LDM_REGCNT() == 1) + { + //P=1 + //U=0 + //L=1 + //W=1 + //S=0 + + u32 old_opcd = opcd; + + //One register xfered + //Can be rewriten as normal mem opcode .. + opcd = 0xE4000000; + + //Imm offset + opcd |= 0 << 25; + //Pre/Post incr + opcd |= old_opcd & (1 << 24); + //Up/Dn + opcd |= old_opcd & (1 << 23); + //Word/Byte + opcd |= 0 << 22; + //Write back + opcd |= old_opcd & (1 << 21); + //Store/Load + opcd |= old_opcd & (1 << 20); + + //Rn + opcd |= Rn << 16; + + //Rd + u32 Rd = findfirstset(RList) - 1; + opcd |= Rd << 12; + + //Offset + opcd |= 4; + + arm_printf("ARM: MEM TFX W %08X\n", opcd); + + return VOT_Read; + } + else if (CHK_BTS(0xE10F0FFF, 0, 0xE10F0000)) + { + return VOT_MRS; + } + else if (CHK_BTS(0xEFBFFFF0, 0, 0xE129F000)) + { + return VOT_MSR; + } + else if ((opcd >> 25) == (0xE8 / 2) && CHK_BTS(32768, 0, 0)) + { + arm_printf("ARM: MEM FB %08X\n", opcd); + flags |= OP_MFB; //(flag Just for the fallback counters) + } + else + { + arm_printf("ARM: FB %08X\n", opcd); + } + + //by default fallback to interpr + return VOT_Fallback; + } + + //very quick-and-dirty register rename based virtualisation + u32 renamed_regs[16]; + u32 rename_reg_base; + + void RenameRegReset() + { + rename_reg_base = r1; + memset(renamed_regs, 0, sizeof(renamed_regs)); + } + + //returns new reg #. didrn is true if a rename mapping was added + u32 RenameReg(u32 regn, bool& didrn) + { + if (renamed_regs[regn] == 0) + { + renamed_regs[regn] = rename_reg_base; + rename_reg_base++; + didrn = true; + } + else + { + didrn = false; + } + + return renamed_regs[regn]; + } + + //For reg reads (they need to be loaded) + //load can be used to skip loading (for RD if not cond) + void LoadAndRename(u32& opcd, u32 bitpos, bool load, u32 pc) + { + bool didrn; + u32 reg = (opcd >> bitpos) & 15; + + u32 nreg = RenameReg(reg, didrn); + + opcd = (opcd & ~(15 << bitpos)) | (nreg << bitpos); + + if (load && didrn) + { + if (reg == 15) + armv->MOV32((eReg)nreg, pc); + else + armv->LoadReg((eReg)nreg, reg); + } + } + + //For results store (they need to be stored) + void StoreAndRename(u32 opcd, u32 bitpos) + { + bool didrn; + u32 reg = (opcd >> bitpos) & 15; + + u32 nreg = RenameReg(reg, didrn); + + verify(!didrn); + + if (reg == 15) + reg = R15_ARM_NEXT; + + armv->StoreReg((eReg)nreg, reg); + } + + //Virtualise Data Processing opcode + void VirtualizeOpcode(u32 opcd, u32 flag, u32 pc) + { + //Keep original opcode for info + u32 orig = opcd; + + //Load arm flags, RS0/8/16, RD12/16 (as indicated by the decoder flags) + + if (flag & OP_HAS_FLAGS_READ) + { + armv->LoadFlags(); + } + + auto pc_pipeline_offset = (flag & OP_HAS_RS_8) ? 12 : 8; + + if (flag & OP_HAS_RS_0) + LoadAndRename(opcd, 0, true, pc + pc_pipeline_offset); //8 or 12 + if (flag & OP_HAS_RS_8) + LoadAndRename(opcd, 8, true, pc + 8); // always 8 + if (flag & OP_HAS_RS_16) + LoadAndRename(opcd, 16, true, pc + pc_pipeline_offset); // 8 or 12 + + if (flag & OP_HAS_RD_12) + LoadAndRename(opcd, 12, flag & OP_HAS_RD_READ, pc + 4); + + if (flag & OP_HAS_RD_16) + { + verify(!(flag & OP_HAS_RS_16)); + LoadAndRename(opcd, 16, flag & OP_HAS_RD_READ, pc + 4); + } + + //Opcode has been modified to use the new regs + //Emit it ... + arm_printf("Arm Virtual: %08X -> %08X\n", orig, opcd); + armEmit32(opcd); + + //Store arm flags, rd12/rd16 (as indicated by the decoder flags) + if (flag & OP_HAS_RD_12) + StoreAndRename(orig, 12); + + if (flag & OP_HAS_RD_16) + StoreAndRename(orig, 16); + + //Sanity check .. + if (renamed_regs[15] != 0) + { + verify(flag & OP_READS_PC || (flag & OP_SETS_PC && !(flag & OP_IS_COND))); + } + + if (flag & OP_HAS_FLAGS_WRITE) + armv->StoreFlags(); + } + + void DumpRegs(const char* output) + { + static FILE* f = fopen(output, "w"); + static int id = 0; +#if 0 + if (490710 == id) + { + __asm int 3; + } +#endif + verify(id != 137250); +#if 1 + fprintf(f, "%d\n", id); + //for(int i=0;i<14;i++) + { + int i = R15_ARM_NEXT; + fprintf(f, "r%d=%08X\n", i, arm_reg[i].I); + } +#endif + id++; + } + + void DYNACALL PrintOp(u32 opcd) + { + printf("%08X\n", opcd); + } + + +#undef r + + /* + TODO: + R15 read/writing is kind of .. weird + Gotta investigate why .. + */ + + //Mem operand 2 calculation, if Reg or large imm + void MemOperand2(eReg dst, bool I, bool U, u32 offs, u32 opcd) + { + if (I == true) + { + u32 Rm = (opcd >> 0) & 15; + verify(CHK_BTS(7, 4, 0));// only SHL mode + armv->LoadReg(r1, Rm); + u32 SA = 31 & (opcd >> 7); + //can't do shifted add for now -- EMITTER LIMIT -- + if (SA) + armv->lsl(r1, r1, SA); + } + else + { + armv->MOV32(r1, offs); + } + + if (U) + armv->add(dst, r0, r1); + else + armv->sub(dst, r0, r1); + } + + + + //Compile & run block of code, starting armNextPC + void CompileCode() + { + //setup local pc counter + u32 pc = armNextPC; + + //emitter/block setup + if (!armv->setup()) { + printf("ARM7: ICache is full, invalidate old entries ... (%08X)\n", pc); //ifdebug + InvalidateJitCache(); + return; + } + + arm_printf("ARM7: Compiling block @ %08X\n", pc); + + verify(virtBackend == nullptr); + + virtBackend = armv.get(); + + //Get the code ptr + void* rv = armv->armGetEmitPtr(); + + //update the block table + verify(EntryPoints[(armNextPC & ctx->aram_mask) / 4] == lps.compilecode); + + EntryPoints[(armNextPC & ctx->aram_mask) / 4] = rv; + + //the ops counter is used to terminate the block (max op count for a single block is 32 currently) + //We don't want too long blocks for timing accuracy + u32 ops = 0; + + u32 Cycles = 0; + + for (;;) + { + ops++; + + //Read opcode ... + u32 opcd = CPUReadMemoryQuick(pc); + +#if HOST_CPU==CPU_X86 + //Sanity check: Stale cache + armv->check_cache(opcd, pc); +#endif + + u32 op_flags; + + //Decode & handle opcode + + OpType opt = DecodeOpcode(opcd, op_flags); + + switch (opt) + { + case VOT_DataOp: + { + //data processing opcode that can be virtualised + RenameRegReset(); + + /* + if (op_flags & OP_READS_PC) + armv->imm_to_reg(15,pc+8); + + else*/ +#if HOST_CPU==CPU_X86 + armv->imm_to_reg(15, rand()); +#endif + + VirtualizeOpcode(opcd, op_flags, pc); + +#if HOST_CPU==CPU_X86 + armv->imm_to_reg(15, rand()); +#endif + } + break; + + case VOT_BR: + { + //Branch to reg + ConditionCode cc = (ConditionCode)(opcd >> 28); + + verify(op_flags & OP_SETS_PC); + + if (cc != CC_AL) + { + armv->LoadFlags(); + armv->imm_to_reg(R15_ARM_NEXT, pc + 4); + } + + armv->LoadReg(r0, opcd & 0xF); + + armv->bic(r0, r0, 3); + + void* ref = armv->start_conditional(cc); + armv->StoreReg(r0, R15_ARM_NEXT, cc); + armv->end_conditional(ref); + } + break; + + case VOT_B: + case VOT_BL: + { + //Branch to imm + + //<<2, sign extend ! + s32 offs = ((s32)opcd << 8) >> 6; + + if (op_flags & OP_IS_COND) + { + armv->imm_to_reg(R15_ARM_NEXT, pc + 4); + armv->LoadFlags(); + ConditionCode cc = (ConditionCode)(opcd >> 28); + void* ref = armv->start_conditional(cc); + if (opt == VOT_BL) + { + armv->MOV32(r0, pc + 4); + armv->StoreReg(r0, 14, cc); + } + + armv->MOV32(r0, pc + 8 + offs); + armv->StoreReg(r0, R15_ARM_NEXT, cc); + armv->end_conditional(ref); + } + else + { + if (opt == VOT_BL) + armv->imm_to_reg(14, pc + 4); + + armv->imm_to_reg(R15_ARM_NEXT, pc + 8 + offs); + } + } + break; + + case VOT_Read: + { + //LDR/STR + + u32 offs = opcd & 4095; + bool U = opcd & (1 << 23); + bool Pre = opcd & (1 << 24); + + bool W = opcd & (1 << 21); + bool I = opcd & (1 << 25); + + u32 Rn = (opcd >> 16) & 15; + u32 Rd = (opcd >> 12) & 15; + + bool DoWB = W || (!Pre && Rn != Rd); //Write back if: W, Post update w/ Rn!=Rd + bool DoAdd = DoWB || Pre; + + //Register not updated anyway + if (I == false && offs == 0) + { + DoWB = false; + DoAdd = false; + } + + //verify(Rd!=15); + verify(!((Rn == 15) && DoWB)); + + //AGU + if (Rn != 15) + { + armv->LoadReg(r0, Rn); + + if (DoAdd) + { + eReg dst = Pre ? r0 : armv->GetSafeReg(); + armv->LoadReg(dst, Rn); + + if (I == false && is_i8r4(offs)) + { + if (U) + armv->add(dst, r0, offs); + else + armv->add(dst, r0, -offs); + } + else + { + MemOperand2(dst, I, U, offs, opcd); + } + + if (DoWB && dst == r0) + armv->mov(armv->GetSafeReg(), r0); + } + } + else + { + u32 addr = pc + 8; + + if (Pre && offs && I == false) + { + addr += U ? offs : -offs; + } + + armv->MOV32(r0, addr); + + if (Pre && I == true) + { + MemOperand2(r1, I, U, offs, opcd); + armv->add(r0, r0, r1); + } + } + + if (CHK_BTS(1, 20, 0)) + { + if (Rd == 15) + { + armv->MOV32(r1, pc + 12); + } + else + { + armv->LoadReg(r1, Rd); + } + } + + //Call handler + if (CHK_BTS(1, 20, 1)) { + armv->MOVPTR(r1, (uintptr_t)ctx); + armv->call(GetMemOp(CHK_BTS(1, 20, 1), CHK_BTS(1, 22, 1)), 2, 1); + } + else + { + armv->MOVPTR(r2, (uintptr_t)ctx); + armv->call(GetMemOp(CHK_BTS(1, 20, 1), CHK_BTS(1, 22, 1)), 3, 0); + } + + if (CHK_BTS(1, 20, 1)) + { + if (CHK_BTS(1, 22, 1)) { + armv->zxtb(r0, r0); + } + + if (Rd == 15) + { + verify(op_flags & OP_SETS_PC); + armv->StoreReg(r0, R15_ARM_NEXT); + } + else + { + armv->StoreReg(r0, Rd); + } + } + + //Write back from AGU, if any + if (DoWB && Rn != Rd) + { + armv->StoreReg(armv->GetSafeReg(), Rn); + } + } + break; + + case VOT_MRS: + { + u32 Rd = (opcd >> 12) & 15; + + armv->MOVPTR(r0, (uintptr_t)ctx); + armv->call((void*)ARM7Backend::CPUUpdateCPSR, 1, 0); + + if (opcd & (1 << 22)) + { + armv->LoadReg(r0, 17); + } + else + { + armv->LoadReg(r0, 16); + } + + armv->StoreReg(r0, Rd); + } + break; + + case VOT_MSR: + { + u32 Rm = (opcd >> 0) & 15; + + //FIXME PARAM 0 + armv->MOVPTR(r0, (uintptr_t)ctx); + armv->LoadReg(r1, Rm); + if (opcd & (1 << 22)) + armv->call((void*)(void (DYNACALL*)(u32)) & MSR_do<1>, 2, 0); + else + armv->call((void*)(void (DYNACALL*)(u32)) & MSR_do<0>, 2, 0); + + if (op_flags & OP_SETS_PC) + armv->imm_to_reg(R15_ARM_NEXT, pc + 4); + } + break; + + case VOT_Fallback: + { + //interpreter fallback + + //arm_single_op needs PC+4 on r15 + //TODO: only write it if needed -> Probably not worth the code, very few fallbacks now... + armv->imm_to_reg(15, pc + 8); + + //For cond branch, MSR + if (op_flags & OP_SETS_PC) + armv->imm_to_reg(R15_ARM_NEXT, pc + 4); + +#if HOST_CPU==CPU_X86 + if (!(op_flags & OP_SETS_PC)) + armv->imm_to_reg(R15_ARM_NEXT, pc + 4); +#endif + + armv->intpr(opcd); + +#if HOST_CPU==CPU_X86 + if (!(op_flags & OP_SETS_PC)) + { + //Sanity check: next pc + armv->check_pc(pc + 4); +#if 0 + x86e->Emit(op_mov32, ECX, opcd); + x86e->Emit(op_call, x86_ptr_imm(PrintOp)); +#endif + } +#endif + } + break; + + default: + die("can't happen\n"); + } + + //Lets say each opcode takes 9 cycles for now .. + Cycles += 9; + +#if HOST_CPU==CPU_X86 + armv->imm_to_reg(15, 0xF87641FF); + + armv->prof(opt, opcd, op_flags); +#endif + + //Branch ? + if (op_flags & OP_SETS_PC) + { + //x86e->Emit(op_call,x86_ptr_imm(DumpRegs)); // great debugging tool + arm_printf("ARM: %06X: Block End %d\n", pc, ops); + +#if HOST_CPU==CPU_X86 && 0 + //Great fallback finder, also spams console + if (opt == VOT_Fallback) + { + x86e->Emit(op_mov32, ECX, opcd); + x86e->Emit(op_call, x86_ptr_imm(PrintOp)); + } +#endif + break; + } + + //block size limit ? + if (ops > 32) + { + arm_printf("ARM: %06X: Block split %d\n", pc, ops); + + armv->imm_to_reg(R15_ARM_NEXT, pc + 4); + break; + } + + //Goto next opcode + pc += 4; + } + + armv->end(&lps, (void*)rv, Cycles); + + verify(virtBackend == armv.get()); + virtBackend = nullptr; + } + + + + void InvalidateJitCache() + { + armv->InvalidateJitCache(); + + printf("ARM7: Invalidating cache\n"); + for (u32 i = 0; i < ARRAY_SIZE(EntryPoints); i++) + EntryPoints[i] = lps.compilecode; + } + + void armt_init() + { + InitHash(); + } + + void* GetEntrypointBase() { + return EntryPoints; + } +}; + +ARM7Backend* ARM7Backend::CreateJit(Arm7Context* ctx) { + return new Arm7JitVirt_impl(ctx); +} + +void DYNACALL CompileCode(Arm7JitVirt_impl* arm) { + arm->CompileCode(); +} + +#endif \ No newline at end of file diff --git a/libswirl/hw/arm7/arm7_jit_virt_arm32.cpp b/libswirl/hw/arm7/arm7_jit_virt_arm32.cpp new file mode 100644 index 0000000000..e75e5e829a --- /dev/null +++ b/libswirl/hw/arm7/arm7_jit_virt_arm32.cpp @@ -0,0 +1,479 @@ +#include "types.h" + +#if HOST_CPU == CPU_ARM && FEAT_AREC == DYNAREC_JIT + +#if HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN +#include +#endif + +#if HOST_OS == OS_WINDOWS +#include +#endif + + +#include "arm7.h" +#include "arm7_context.h" +#include "arm7_jit_virt_backend.h" + +#include "virt_arm.h" + +#include "deps/vixl/aarch32/macro-assembler-aarch32.h" + + +const u32 ICacheSize = 1024 * 1024; +#if HOST_OS == OS_WINDOWS +u8 ARM7_TCB[ICacheSize + 4096]; +#elif HOST_OS == OS_LINUX + +u8 ARM7_TCB[ICacheSize + 4096] __attribute__((section(".text"))); + +#elif HOST_OS==OS_DARWIN +u8 ARM7_TCB[ICacheSize + 4096] __attribute__((section("__TEXT, .text"))); +#else +#error ARM7_TCB ALLOC +#endif + + + +using namespace vixl::aarch32; + + +#define arm_reg ctx->regs +#define armMode ctx->armMode + +#define armNextPC arm_reg[R15_ARM_NEXT].I +#define CPUReadMemoryQuick(addr) (*(u32*)&ctx->aica_ram[addr&ctx->aram_mask]) + +#if HOST_OS==OS_DARWIN +#include +extern "C" void armFlushICache(void* code, void* pEnd) { + sys_dcache_flush(code, (u8*)pEnd - (u8*)code + 1); + sys_icache_invalidate(code, (u8*)pEnd - (u8*)code + 1); +} +#else +extern "C" void armFlushICache(void* bgn, void* end) { +#if defined(_ANDROID) + __clear_cache(bgn, end); +#else + __builtin___clear_cache(bgn, end); +#endif +} +#endif + +/* arm backend + * based on the virt-x86 one + * shares lots of code with it +*/ +struct Arm7VirtBackendArm32 : Arm7VirtBackend { + MacroAssembler* assembler; + ARM7Backend* arm; + Arm7Context* ctx; + + u8* ICache; + u8* icPtr_Base; + u8* icPtr; + + Arm7VirtBackendArm32(ARM7Backend* arm, Arm7Context* ctx) : arm(arm), ctx(ctx) { + + //align to next page .. + ICache = (u8*)(((unat)ARM7_TCB + 4095) & ~4095); + +#if HOST_OS==OS_DARWIN + //Can't just mprotect on iOS + munmap(ICache, ICacheSize); + ICache = (u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); +#endif + +#if HOST_OS == OS_WINDOWS + DWORD old; + VirtualProtect(ICache, ICacheSize, PAGE_EXECUTE_READWRITE, &old); +#elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN + + printf("\n\t ARM7_TCB addr: %p | from: %p | addr here: %p\n", ICache, ARM7_TCB, &ARM7Backend::singleOp); + + if (mprotect(ICache, ICacheSize, PROT_EXEC | PROT_READ | PROT_WRITE)) + { + perror("\n\tError - Couldn�t mprotect ARM7_TCB!"); + verify(false); + } + +#if TARGET_IPHONE + memset((u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0), 0xFF, ICacheSize); +#else + memset(ICache, 0xFF, ICacheSize); +#endif + +#endif + + icPtr = ICache; + } + + ARM::eReg GetSafeReg() { +#if HOST_OS == OS_DARWIN + return (ARM::eReg)11; +#else + return (ARM::eReg)9; +#endif + } + + void GenerateLooppoints(Looppoints* lp) { + void* codestart = icPtr; + assembler = new MacroAssembler(icPtr, ICache + ICacheSize - icPtr, A32); + + // generate the loop points here + + Label dispatch; + assembler->Bind(&dispatch); + lp->dispatch = assembler->GetCursorAddress(); + // arm_dispatch + { + + /* + ldrd r0, r1, [r8, #184] @load : Next PC, interrupt + + @ TODO : FIX THIS TO NOT BE STATIC / CODEGEN on INIT +#if INTERNAL_ARAM_SIZE == 2*1024*1024 + ubfx r2, r0, #2, #19 @ assuming 2 MB address space max(21 bits) +#elif INTERNAL_ARAM_SIZE == 8*1024*1024 + ubfx r2, r0, #2, #21 @ assuming 8 MB address space max(23 bits) +#else +#error Unsupported AICA RAM size +#endif +*/ + assembler->Ldrd(r0, r1, MemOperand(r8, 184)); + if ((ctx->aram_mask + 1) == 2 * 1024 * 1024) { + assembler->Ubfx(r2, r0, 2, 19); + } + else if ((ctx->aram_mask + 1) == 8 * 1024 * 1024) { + assembler->Ubfx(r2, r0, 2, 21); + } + else { + die("Unsupported AICA RAM size"); + } + + /* +cmp r1, #0 +bne arm_dofiq + +ldr pc, [r4, r2, lsl #2] +*/ + Label arm_dofiq; + assembler->Cmp(r1, 0); + assembler->B(ne, &arm_dofiq); + assembler->Add(r2, r4, Operand(r2, LSL, 2)); + assembler->Ldr(pc, MemOperand(r2)); + + + /* + arm_dofiq: + bl CSYM(CPUFiq) + b CSYM(arm_dispatch) + */ + assembler->Bind(&arm_dofiq); + assembler->Mov(r0, (uintptr_t)ctx); + ptrdiff_t offset = reinterpret_cast(&ARM7Backend::CPUFiq) - assembler->GetBuffer()->GetStartAddress(); + Label CPUFiq_label; + assembler->BindToOffset(&CPUFiq_label, offset); + assembler->Bl(&CPUFiq_label); + assembler->B(&dispatch); + } + + lp->mainloop = assembler->GetCursorAddress(); + // arm_mainloop + { + + + /* + #if HOST_OS == OS_DARWIN + push { + r4, r5, r8, r11, lr + } + #else + push{ r4,r5,r8,r9,lr } + #endif + */ + assembler->Push(RegisterList(r4, r5, r8, lr)); + assembler->Push(RegisterList(r4,Register(GetSafeReg()))); + + /* + #ifdef TARGET_IPHONE + ldr r8, Xarm_Reg @load cntx + ldr r4, XEntryPoints @load lookup base + #else + mov r8, r1 @load cntx + mov r4, r2 @load lookup base + #endif + + ldr r5, [r8, #192] @load cycle count + add r5, r0 @add cycles for this timeslice + + */ + + assembler->Mov(r8, (uintptr_t)ctx); + assembler->Mov(r4, (uintptr_t)arm->GetEntrypointBase()); + + assembler->Ldr(r5, MemOperand(r8, 192)); + assembler->Add(r5, r5, r0); + + //b CSYM(arm_dispatch) + assembler->B(&dispatch); + } + + lp->compilecode = assembler->GetCursorAddress(); + // arm_compilecode + { + assembler->Mov(r0, (uintptr_t)arm); + ptrdiff_t offset = reinterpret_cast(&CompileCode) - assembler->GetBuffer()->GetStartAddress(); + Label CompileCode_label; + assembler->BindToOffset(&CompileCode_label, offset); + assembler->Bl(&CompileCode_label); + + assembler->B(&dispatch); + } + + lp->exit = assembler->GetCursorAddress(); + // arm_exit + { + /* + CSYM(arm_exit) : + str r5, [r8, #192] @if timeslice is over, save remaining cycles +#if HOST_OS == OS_DARWIN + pop{ r4,r5,r8,r11,pc } +#else + pop{ r4,r5,r8,r9,pc } +#endif + */ + //assembler->Brk(0); + assembler->Str(r5, MemOperand(r8, 192)); + assembler->Pop(RegisterList(r4, Register(GetSafeReg()))); + assembler->Pop(RegisterList(r4, r5, r8, pc)); + } + + + // cleanup + + assembler->FinalizeCode(); + verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity()); + armFlushICache(codestart, assembler->GetCursorAddress()); + + icPtr += assembler->GetBuffer()->GetSizeInBytes(); + icPtr_Base = icPtr; + + delete assembler; + assembler = nullptr; + } + + MemOperand arm_reg_operand(u32 regn) + { + return MemOperand(r8, (u8*)&ctx->regs[regn].I - (u8*)&ctx->regs[0].I); + } + + void* armGetEmitPtr() + { + if (icPtr < (ICache + ICacheSize - 1024)) //ifdebug + return static_cast(icPtr); + + return NULL; + } + + //helpers ... + void LoadReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) + { + assembler->Ldr(Condition(cc), Register(rd), arm_reg_operand(regn)); + } + + void StoreReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) + { + assembler->Str(Condition(cc), Register(rd), arm_reg_operand(regn)); + } + + void* start_conditional(ARM::ConditionCode cc) + { + return nullptr; + } + + void end_conditional(void* ref) + { + + } + + // FIXME IMPL + //For COND + void LoadFlags() + { + //Load flags + LoadReg(ARM::r0, RN_PSR_FLAGS); + //move them to flags register + assembler->Msr(MaskedSpecialRegister(APSR_nzcvq), r0); + } + + // FIXME IMPL + void StoreFlags() + { + //get results from flags register + assembler->Mrs(r1, SpecialRegister(APSR)); + //Store flags + StoreReg(ARM::r1, RN_PSR_FLAGS); + } + + void imm_to_reg(u32 regn, u32 imm) + { + assembler->Mov(r0, imm); + assembler->Str(r0, arm_reg_operand(regn)); + } + + void call(void* loc, int params, int returns) + { + ptrdiff_t offset = reinterpret_cast(loc) - assembler->GetBuffer()->GetStartAddress(); + Label function_label; + assembler->BindToOffset(&function_label, offset); + assembler->Bl(&function_label); + } + + + bool setup() + { + if (icPtr >= (ICache + ICacheSize - 64 * 1024)) { + return false; + } + + assembler = new MacroAssembler(icPtr, ICache + ICacheSize - icPtr, A32); + + return true; + } + + void intpr(u32 opcd) + { + //Call interpreter + assembler->Mov(r0, (uintptr_t)ctx); + assembler->Mov(r1, opcd); + call((void*)&ARM7Backend::singleOp, 1, 1); + } + + + void end(Looppoints* lp, void* codestart, u32 cycl) + { + //Normal block end + //cycle counter rv + + //pop registers & return + assembler->Subs(r5, r5, cycl); + ptrdiff_t offset = reinterpret_cast(lp->exit) - assembler->GetBuffer()->GetStartAddress(); + Label arm_exit_label; + assembler->BindToOffset(&arm_exit_label, offset); + assembler->B(mi, &arm_exit_label); //statically predicted as not taken + + offset = reinterpret_cast(lp->dispatch) - assembler->GetBuffer()->GetStartAddress(); + Label arm_dispatch_label; + assembler->BindToOffset(&arm_dispatch_label, offset); + assembler->B(&arm_dispatch_label); + + assembler->FinalizeCode(); + verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity()); + armFlushICache(codestart, assembler->GetCursorAddress()); + + icPtr += assembler->GetBuffer()->GetSizeInBytes(); + +#if 0 + Instruction* instr_start = (Instruction*)codestart; + Instruction* instr_end = assembler->GetBuffer()->GetEndAddress(); + Decoder decoder; + Disassembler disasm; + decoder.AppendVisitor(&disasm); + Instruction* instr; + for (instr = instr_start; instr < instr_end; instr += kInstructionSize) { + decoder.Decode(instr); + printf("arm64 arec\t %p:\t%s\n", + reinterpret_cast(instr), + disasm.GetOutput()); + } +#endif + delete assembler; + assembler = NULL; + } + + //Hook cus varm misses this, so x86 needs special code + void MOVPTR(ARM::eReg regn, uintptr_t imm) + { + assembler->Mov(Register(regn), imm); + } + + void MOV32(ARM::eReg regn, u32 imm) + { + assembler->Mov(Register(regn), imm); + } + + + void mov(ARM::eReg regd, ARM::eReg regn) + { + assembler->Mov(Register(regd), Register(regn)); + } + + virtual void sxtb(ARM::eReg regd, ARM::eReg regs) + { + assembler->Sxtb(Register(regd), Register(regs)); + } + + virtual void zxtb(ARM::eReg regd, ARM::eReg regs) + { + assembler->And(Register(regd), Register(regs), 0xFF); + } + + + void add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) + { + assembler->Add(Register(regd), Register(regn), Register(regm)); + } + + void sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) + { + assembler->Sub(Register(regd), Register(regn), Register(regm)); + } + + void add(ARM::eReg regd, ARM::eReg regn, s32 imm) + { + assembler->Add(Register(regd), Register(regn), imm); + } + + void lsl(ARM::eReg regd, ARM::eReg regn, u32 imm) + { + assembler->Lsl(Register(regd), Register(regn), imm); + } + + void bic(ARM::eReg regd, ARM::eReg regn, u32 imm) + { + assembler->Bic(Register(regd), Register(regn), imm); + } + + void Emit32(u32 emit32) + { + assembler->EmitA32(emit32); + } + + //sanity check: non branch doesn't set pc + void check_pc(u32 pc) + { + + } + + //sanity check: stale cache + void check_cache(u32 opcd, u32 pc) + { + + } + + //profiler hook + void prof(OpType opt, u32 op, u32 flags) + { + + } + + void InvalidateJitCache() { + icPtr = icPtr_Base; + } +}; + +Arm7VirtBackend* Arm7VirtBackend::Create(ARM7Backend* arm, Arm7Context* ctx) { + return new Arm7VirtBackendArm32(arm, ctx); +} +#endif diff --git a/libswirl/hw/arm7/arm7_jit_virt_arm64.cpp b/libswirl/hw/arm7/arm7_jit_virt_arm64.cpp new file mode 100644 index 0000000000..c653ecc100 --- /dev/null +++ b/libswirl/hw/arm7/arm7_jit_virt_arm64.cpp @@ -0,0 +1,771 @@ +/* + Copyright 2019 flyinghead + + This file is part of reicast. + + reicast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + reicast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with reicast. If not, see . + */ + +#include "build.h" +#if HOST_CPU == CPU_ARM64 && FEAT_AREC != DYNAREC_NONE + +#include +#include "arm7.h" +#include "arm7_context.h" +#include "arm7_jit_virt_backend.h" +#include "jit/emitter/arm32/arm_coding.h" +#include "deps/vixl/aarch64/macro-assembler-aarch64.h" + +#if HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN +#include +#endif +#if HOST_OS == OS_WINDOWS +#include +#endif + + +using namespace vixl::aarch64; +//#include "deps/vixl/aarch32/disasm-aarch32.h" + +extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end); + + +const u32 ICacheSize = 1024 * 1024; +#if HOST_OS == OS_WINDOWS +u8 ARM7_TCB[ICacheSize + 4096]; +#elif HOST_OS == OS_LINUX + +u8 ARM7_TCB[ICacheSize + 4096] __attribute__((section(".text"))); + +#elif HOST_OS==OS_DARWIN +u8 ARM7_TCB[ICacheSize + 4096] __attribute__((section("__TEXT, .text"))); +#else +#error ARM7_TCB ALLOC +#endif + + + +extern "C" void armFlushICache(void *bgn, void *end) { + vmem_platform_flush_cache(bgn, end, bgn, end); +} + +struct Arm7JitArm7VirtBackendArm64 : Arm7VirtBackend { + + MacroAssembler* assembler; + ARM7Backend* arm; + Arm7Context* ctx; + + u8* ICache; + u8* icPtr_Base; + u8* icPtr; + + + Arm7JitArm7VirtBackendArm64(ARM7Backend* arm, Arm7Context* ctx) : arm(arm), ctx(ctx) { + + //align to next page .. + ICache = (u8*)(((unat)ARM7_TCB + 4095) & ~4095); + +#if HOST_OS==OS_DARWIN + //Can't just mprotect on iOS + munmap(ICache, ICacheSize); + ICache = (u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); +#endif + +#if HOST_OS == OS_WINDOWS + DWORD old; + VirtualProtect(ICache, ICacheSize, PAGE_EXECUTE_READWRITE, &old); +#elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN + + printf("\n\t ARM7_TCB addr: %p | from: %p | addr here: %p\n", ICache, ARM7_TCB, &ARM7Backend::singleOp); + + if (mprotect(ICache, ICacheSize, PROT_EXEC | PROT_READ | PROT_WRITE)) + { + perror("\n\tError - Couldn't mprotect ARM7_TCB!"); + verify(false); + } + +#if TARGET_IPHONE + memset((u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0), 0xFF, ICacheSize); +#else + memset(ICache, 0xFF, ICacheSize); +#endif + +#endif + + icPtr = ICache; + } + + ARM::eReg GetSafeReg() { + return (ARM::eReg)25; + } + + void GenerateLooppoints(Looppoints* lp) { + void* codestart = icPtr; + assembler = new MacroAssembler(icPtr, ICache + ICacheSize - icPtr); + + // generate the loop points here + + lp->dispatch = assembler->GetCursorAddress(); + // arm_dispatch + { + + //"arm_dispatch: \n\t" + //"ldp w0, w1, [x28, #184] \n\t" // load Next PC, interrupt + assembler->Ldp(w0, w1, MemOperand(x28, 184)); + if ((ctx->aram_mask + 1) == 2 * 1024 * 1024) { + //"ubfx w2, w0, #2, #19 \n\t" // w2 = pc >> 2. Note: assuming address space == 2 MB (21 bits) + assembler->Ubfx(w2, w0, 2, 19); + } + else if ((ctx->aram_mask + 1) == 8 * 1024 * 1024) { + //"ubfx w2, w0, #2, #21 \n\t" // w2 = pc >> 2. Note: assuming address space == 8 MB (23 bits) + assembler->Ubfx(w2, w0, 2, 21); + } + else { + die("Unsupported AICA RAM size"); + } + + Label arm_dofiq; + //"cbnz w1, arm_dofiq \n\t" // if interrupt pending, handle it + assembler->Cbnz(w1, &arm_dofiq); + //"add x2, x26, x2, lsl #3 \n\t" // x2 = EntryPoints + pc << 1 + assembler->Add(x2, x26, Operand(x2, LSL, 3)); + //"ldr x3, [x2] \n\t" + assembler->Ldr(x3, MemOperand(x2)); + //"br x3 \n" + assembler->Br(x3); + + assembler->Bind(&arm_dofiq); + //"arm_dofiq: \n\t" + // mov x0, ctx + assembler->Mov(x0, (uintptr_t)ctx); + //"bl CPUFiq \n\t" + ptrdiff_t offset = reinterpret_cast(&ARM7Backend::CPUFiq) - assembler->GetBuffer()->GetStartAddress(); + Label CPUFiq_label; + assembler->BindToOffset(&CPUFiq_label, offset); + assembler->Bl(&CPUFiq_label); + //"b arm_dispatch \n\t" + offset = reinterpret_cast(lp->dispatch) - assembler->GetBuffer()->GetStartAddress(); + Label arm_dispatch_label; + assembler->BindToOffset(&arm_dispatch_label, offset); + assembler->B(&arm_dispatch_label); + } + + lp->mainloop = assembler->GetCursorAddress(); + // arm_mainloop + { + //"stp x25, x26, [sp, #-48]! \n\t" + assembler->Stp(x25, x26, MemOperand(sp, -48, PreIndex)); + //"stp x27, x28, [sp, #16] \n\t" + assembler->Stp(x27, x28, MemOperand(sp, 16)); + //"stp x29, x30, [sp, #32] \n\t" + assembler->Stp(x29, x30, MemOperand(sp, 32)); + + //"mov x28, x1 \n\t" // arm7 registers + assembler->Mov(x28, (uintptr_t)ctx); + //"mov x26, x2 \n\t" // lookup base + assembler->Mov(x26, (uintptr_t)arm->GetEntrypointBase()); + + //"ldr w27, [x28, #192] \n\t" // cycle count + assembler->Ldr(w27, MemOperand(x28, 192)); + //"add w27, w27, w0 \n" // add cycles for this timeslice + assembler->Add(w27, w27, w0); + + //"b arm_dispatch \n\t" + ptrdiff_t offset = reinterpret_cast(lp->dispatch) - assembler->GetBuffer()->GetStartAddress(); + Label arm_dispatch_label; + assembler->BindToOffset(&arm_dispatch_label, offset); + assembler->B(&arm_dispatch_label); + } + + lp->compilecode = assembler->GetCursorAddress(); + // arm_compilecode + { + assembler->Mov(x0, (uintptr_t)arm); + ptrdiff_t offset = reinterpret_cast(&CompileCode) - assembler->GetBuffer()->GetStartAddress(); + Label CompileCode_label; + assembler->BindToOffset(&CompileCode_label, offset); + assembler->Bl(&CompileCode_label); + offset = reinterpret_cast(lp->dispatch) - assembler->GetBuffer()->GetStartAddress(); + Label arm_dispatch_label; + assembler->BindToOffset(&arm_dispatch_label, offset); + assembler->B(&arm_dispatch_label); + } + + lp->exit = assembler->GetCursorAddress(); + // arm_exit + { + //assembler->Brk(0); + //"str w27, [x28, #192] \n\t" // if timeslice is over, save remaining cycles + assembler->Str(w27, MemOperand(x28, 192)); + //"ldp x29, x30, [sp, #32] \n\t" + assembler->Ldp(x29, x30, MemOperand(sp, 32)); + //"ldp x27, x28, [sp, #16] \n\t" + assembler->Ldp(x27, x28, MemOperand(sp, 16)); + //"ldp x25, x26, [sp], #48 \n\t" + assembler->Ldp(x25, x26, MemOperand(sp, 48, PostIndex)); + //"ret \n" + assembler->Ret(); + } + + + // cleanup + + assembler->FinalizeCode(); + verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity()); + vmem_platform_flush_cache( + codestart, assembler->GetCursorAddress(), + codestart, assembler->GetCursorAddress()); + + icPtr += assembler->GetBuffer()->GetSizeInBytes(); + icPtr_Base = icPtr; + + delete assembler; + assembler = nullptr; + } + + MemOperand arm_reg_operand(u32 regn) + { + return MemOperand(x28, (u8*)&ctx->regs[regn].I - (u8*)&ctx->regs[0].I); + } + + void* armGetEmitPtr() + { + if (icPtr < (ICache + ICacheSize - 1024)) //ifdebug + return static_cast(icPtr); + + return NULL; + } + //helpers ... + void LoadReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) + { + assembler->Ldr(Register::GetWRegFromCode(rd), arm_reg_operand(regn)); + } + void StoreReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) + { + assembler->Str(Register::GetWRegFromCode(rd), arm_reg_operand(regn)); + } + + void* start_conditional(ARM::ConditionCode cc) + { + if (cc == ARM::CC_AL) + return NULL; + Label* label = new Label(); + verify(cc <= ARM::CC_LE); + Condition condition = (Condition)((u32)cc ^ 1); + assembler->B(label, condition); + + return label; + } + + void end_conditional(void* ref) + { + if (ref != NULL) + { + Label* label = (Label*)ref; + assembler->Bind(label); + delete label; + } + } + + //For COND + void LoadFlags() + { + //Load flags + LoadReg(ARM::r0, RN_PSR_FLAGS); + //move them to flags register + assembler->Msr(NZCV, x0); + } + + void StoreFlags() + { + //get results from flags register + assembler->Mrs(x1, NZCV); + //Store flags + StoreReg(ARM::r1, RN_PSR_FLAGS); + } + + void imm_to_reg(u32 regn, u32 imm) + { + assembler->Mov(w0, imm); + assembler->Str(w0, arm_reg_operand(regn)); + } + + void call(void* loc, int params, int returns) + { + ptrdiff_t offset = reinterpret_cast(loc) - assembler->GetBuffer()->GetStartAddress(); + Label function_label; + assembler->BindToOffset(&function_label, offset); + assembler->Bl(&function_label); + } + + bool setup() + { + if (icPtr >= (ICache + ICacheSize - 64 * 1024)) { + return false; + } + + assembler = new MacroAssembler(icPtr, ICache + ICacheSize - icPtr); + + return true; + } + + void intpr(u32 opcd) + { + //Call interpreter + assembler->Mov(x0, (uintptr_t)ctx); + assembler->Mov(w1, opcd); + call((void*)&ARM7Backend::singleOp, 1, 1); + } + + void end(Looppoints* lp, void* codestart, u32 cycl) + { + //Normal block end + //cycle counter rv + + //pop registers & return + assembler->Subs(w27, w27, cycl); + ptrdiff_t offset = reinterpret_cast(lp->exit) - assembler->GetBuffer()->GetStartAddress(); + Label arm_exit_label; + assembler->BindToOffset(&arm_exit_label, offset); + assembler->B(&arm_exit_label, mi); //statically predicted as not taken + + offset = reinterpret_cast(lp->dispatch) - assembler->GetBuffer()->GetStartAddress(); + Label arm_dispatch_label; + assembler->BindToOffset(&arm_dispatch_label, offset); + assembler->B(&arm_dispatch_label); + + assembler->FinalizeCode(); + verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity()); + vmem_platform_flush_cache( + codestart, assembler->GetCursorAddress(), + codestart, assembler->GetCursorAddress()); + icPtr += assembler->GetBuffer()->GetSizeInBytes(); + +#if 0 + Instruction* instr_start = (Instruction*)codestart; + Instruction* instr_end = assembler->GetCursorAddress(); + Decoder decoder; + Disassembler disasm; + decoder.AppendVisitor(&disasm); + Instruction* instr; + for (instr = instr_start; instr < instr_end; instr += kInstructionSize) { + decoder.Decode(instr); + printf("arm64 arec\t %p:\t%s\n", + reinterpret_cast(instr), + disasm.GetOutput()); + } +#endif + delete assembler; + assembler = NULL; + } + + //Hook cus varm misses this, so x86 needs special code + void MOVPTR(ARM::eReg regn, uintptr_t imm) + { + assembler->Mov(Register::GetXRegFromCode(regn), imm); + } + + void MOV32(ARM::eReg regn, u32 imm) + { + assembler->Mov(Register::GetWRegFromCode(regn), imm); + } + + + void mov(ARM::eReg regd, ARM::eReg regn) + { + assembler->Mov(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn)); + } + + virtual void sxtb(ARM::eReg regd, ARM::eReg regs) + { + assembler->Sxtb(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regs)); + } + + virtual void zxtb(ARM::eReg regd, ARM::eReg regs) + { + assembler->And(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regs), 0xFF); + } + + + void add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) + { + assembler->Add(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), Register::GetWRegFromCode(regm)); + } + + void sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) + { + assembler->Sub(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), Register::GetWRegFromCode(regm)); + } + + void add(ARM::eReg regd, ARM::eReg regn, s32 imm) + { + assembler->Add(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), imm); + } + + void lsl(ARM::eReg regd, ARM::eReg regn, u32 imm) + { + assembler->Lsl(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), imm); + } + + void bic(ARM::eReg regd, ARM::eReg regn, u32 imm) + { + assembler->Bic(Register::GetWRegFromCode(regd), Register::GetWRegFromCode(regn), imm); + } + + class android_buf : public std::stringbuf + { + public: + virtual int sync() override { + printf("ARM7: %s\n", this->str().c_str()); + str(""); + + return 0; + } + }; + + void Emit32(u32 opcode) + { +#if 0 + if (opcode != 0x00011001) + { + android_buf buffer; + std::ostream cout(&buffer); + vixl::aarch32::PrintDisassembler disasm(cout, 0); + disasm.DecodeA32(opcode); + cout.flush(); + } +#endif + + const Register& rd = Register::GetWRegFromCode((opcode >> 12) & 15); + const Register& rn = Register::GetWRegFromCode((opcode >> 16) & 15); + bool set_flags = opcode & (1 << 20); + Operand op2; + int op_type = (opcode >> 21) & 15; + bool logical_op = op_type == 0 || op_type == 1 || op_type == 8 || op_type == 9 // AND, EOR, TST, TEQ + || op_type == 12 || op_type == 13 || op_type == 15 || op_type == 14; // ORR, MOV, MVN, BIC + bool set_carry_bit = false; + + ARM::ConditionCode condition = (ARM::ConditionCode)(opcode >> 28); + void* cond_op_label = start_conditional(condition); + + if (opcode & (1 << 25)) + { + // op2 is imm8r4 + u32 rotate = ((opcode >> 8) & 15) << 1; + u32 imm8 = opcode & 0xff; + op2 = Operand((imm8 >> rotate) | (imm8 << (32 - rotate))); + } + else + { + // op2 is register + const Register& rm = Register::GetWRegFromCode(opcode & 15); + + Shift shift = (Shift)((opcode >> 5) & 3); + + if (opcode & (1 << 4)) + { + // shift by register + // FIXME Carry must be set based on shift/rotate + //if (set_flags && logical_op) + // die("shift by register with set flags C - not implemented"); + const Register& shift_reg = Register::GetWRegFromCode((opcode >> 8) & 15); + + Label shift_by_32_label; + + switch (shift) + { + case LSL: + case LSR: + assembler->Mrs(x0, NZCV); + assembler->Cmp(shift_reg, 32); + if (shift == LSL) + assembler->Lsl(w15, rm, shift_reg); + else + assembler->Lsr(w15, rm, shift_reg); + assembler->Csel(w15, 0, w15, ge); // LSL and LSR by 32 or more gives 0 + assembler->Msr(NZCV, x0); + break; + case ASR: + assembler->Mrs(x0, NZCV); + assembler->Cmp(shift_reg, 32); + assembler->Asr(w15, rm, shift_reg); + assembler->Sbfx(w13, rm, 31, 1); + assembler->Csel(w15, w13, w15, ge); // ASR by 32 or more gives 0 or -1 depending on operand sign + assembler->Msr(NZCV, x0); + break; + case ROR: + assembler->Ror(w15, rm, shift_reg); + break; + default: + die("Invalid shift"); + break; + } + op2 = Operand(w15); + } + else + { + // shift by immediate + u32 shift_imm = (opcode >> 7) & 0x1f; + if (shift != ROR && shift_imm != 0 && !(set_flags && logical_op)) + { + op2 = Operand(rm, shift, shift_imm); + } + else if (shift_imm == 0) + { + if (shift == LSL) + { + op2 = Operand(rm); // LSL 0 is a no-op + } + else + { + // Shift by 32 + if (set_flags && logical_op) + set_carry_bit = true; + if (shift == LSR) + { + if (set_flags && logical_op) + assembler->Ubfx(w14, rm, 31, 1); // w14 = rm[31] + assembler->Mov(w15, 0); // w15 = 0 + } + else if (shift == ASR) + { + if (set_flags && logical_op) + assembler->Ubfx(w14, rm, 31, 1); // w14 = rm[31] + assembler->Sbfx(w15, rm, 31, 1); // w15 = rm < 0 ? -1 : 0 + } + else if (shift == ROR) + { + // RRX + assembler->Cset(w14, cs); // w14 = C + assembler->Mov(w15, Operand(rm, LSR, 1)); // w15 = rm >> 1 + assembler->Bfi(w15, w14, 31, 1); // w15[31] = C + if (set_flags && logical_op) + assembler->Ubfx(w14, rm, 0, 1); // w14 = rm[0] (new C) + } + else + die("Invalid shift"); + op2 = Operand(w15); + } + } + else + { + // Carry must be preserved or Ror shift + if (set_flags && logical_op) + set_carry_bit = true; + if (shift == LSL) + { + assembler->Ubfx(w14, rm, 32 - shift_imm, 1); // w14 = rm[lsb] + assembler->Lsl(w15, rm, shift_imm); // w15 <<= shift + } + else + { + if (set_flags && logical_op) + assembler->Ubfx(w14, rm, shift_imm - 1, 1); // w14 = rm[msb] + + if (shift == LSR) + assembler->Lsr(w15, rm, shift_imm); // w15 >>= shift + else if (shift == ASR) + assembler->Asr(w15, rm, shift_imm); + else if (shift == ROR) + assembler->Ror(w15, rm, shift_imm); + else + die("Invalid shift"); + } + op2 = Operand(w15); + } + } + } + if (!set_carry_bit + && (op_type == 8 || op_type == 9 // TST and TEQ always set flags + || (logical_op && set_flags))) + { + // Logical ops should only affect the carry bit based on the op2 shift + // Here we're not shifting so the carry bit should be preserved + set_carry_bit = true; + assembler->Cset(w14, cs); + } + + switch (op_type) + { + case 0: // AND + if (set_flags) + assembler->Ands(rd, rn, op2); + else + assembler->And(rd, rn, op2); + break; + case 1: // EOR + assembler->Eor(rd, rn, op2); + if (set_flags) + assembler->Tst(rd, rd); + break; + case 2: // SUB + if (set_flags) + assembler->Subs(rd, rn, op2); + else + assembler->Sub(rd, rn, op2); + break; + case 3: // RSB + assembler->Neg(w0, rn); + if (set_flags) + assembler->Adds(rd, w0, op2); + else + assembler->Add(rd, w0, op2); + break; + case 4: // ADD + if (set_flags) + assembler->Adds(rd, rn, op2); + else + assembler->Add(rd, rn, op2); + break; + case 12: // ORR + assembler->Orr(rd, rn, op2); + if (set_flags) + assembler->Tst(rd, rd); + break; + case 14: // BIC + if (set_flags) + assembler->Bics(rd, rn, op2); + else + assembler->Bic(rd, rn, op2); + break; + case 5: // ADC + if (set_flags) + assembler->Adcs(rd, rn, op2); + else + assembler->Adc(rd, rn, op2); + break; + case 6: // SBC + if (set_flags) + assembler->Sbcs(rd, rn, op2); + else + assembler->Sbc(rd, rn, op2); + break; + case 7: // RSC + assembler->Ngc(w0, rn); + if (set_flags) + assembler->Adds(rd, w0, op2); + else + assembler->Add(rd, w0, op2); + break; + case 8: // TST + assembler->Tst(rn, op2); + break; + case 9: // TEQ + assembler->Eor(w0, rn, op2); + assembler->Tst(w0, w0); + break; + case 10: // CMP + assembler->Cmp(rn, op2); + break; + case 11: // CMN + assembler->Cmn(rn, op2); + break; + case 13: // MOV + assembler->Mov(rd, op2); + if (set_flags) + assembler->Tst(rd, rd); + break; + case 15: // MVN + assembler->Mvn(rd, op2); + if (set_flags) + assembler->Tst(rd, rd); + break; + } + if (set_carry_bit) + { + assembler->Mrs(x0, NZCV); + assembler->Bfi(x0, x14, 29, 1); // C is bit 29 in NZCV + assembler->Msr(NZCV, x0); + } + end_conditional(cond_op_label); + } + + + //sanity check: non branch doesn't set pc + virtual void check_pc(u32 pc) { } + + //sanity check: stale cache + virtual void check_cache(u32 opcd, u32 pc) { } + + //profiler hook + virtual void prof(OpType opt, u32 op, u32 flags) { } + + virtual void InvalidateJitCache() { + icPtr = icPtr_Base; + } +}; + +Arm7VirtBackend* Arm7VirtBackend::Create(ARM7Backend* arm, Arm7Context* ctx) { + return new Arm7JitArm7VirtBackendArm64(arm, ctx); +} +#if FIXME_ARM7JIT +// +// Dynarec main loop +// +// w25 is used for temp mem save (post increment op2) +// x26 is the entry points table +// w27 is the cycle counter +// x28 points to the arm7 registers base +__asm__( + ".globl arm_compilecode \n\t" + ".hidden arm_compilecode \n" + "arm_compilecode: \n\t" + "bl CompileCode \n\t" + "b arm_dispatch \n\t" + + ".globl arm_mainloop \n\t" + ".hidden arm_mainloop \n" + "arm_mainloop: \n\t" // arm_mainloop(cycles, regs, entry points) + "stp x25, x26, [sp, #-48]! \n\t" + "stp x27, x28, [sp, #16] \n\t" + "stp x29, x30, [sp, #32] \n\t" + + "mov x28, x1 \n\t" // arm7 registers + "mov x26, x2 \n\t" // lookup base + + "ldr w27, [x28, #192] \n\t" // cycle count + "add w27, w27, w0 \n" // add cycles for this timeslice + + ".globl arm_dispatch \n\t" + ".hidden arm_dispatch \n" + "arm_dispatch: \n\t" + "ldp w0, w1, [x28, #184] \n\t" // load Next PC, interrupt +#if ARAM_SIZE == 2*1024*1024 + "ubfx w2, w0, #2, #19 \n\t" // w2 = pc >> 2. Note: assuming address space == 2 MB (21 bits) +#elif ARAM_SIZE == 8*1024*1024 + "ubfx w2, w0, #2, #21 \n\t" // w2 = pc >> 2. Note: assuming address space == 8 MB (23 bits) +#else +#error Unsupported AICA RAM size +#endif + "cbnz w1, arm_dofiq \n\t" // if interrupt pending, handle it + + "add x2, x26, x2, lsl #3 \n\t" // x2 = EntryPoints + pc << 1 + "ldr x3, [x2] \n\t" + "br x3 \n" + + "arm_dofiq: \n\t" + "bl CPUFiq \n\t" + "b arm_dispatch \n\t" + + ".globl arm_exit \n\t" + ".hidden arm_exit \n" + "arm_exit: \n\t" + "str w27, [x28, #192] \n\t" // if timeslice is over, save remaining cycles + "ldp x29, x30, [sp, #32] \n\t" + "ldp x27, x28, [sp, #16] \n\t" + "ldp x25, x26, [sp], #48 \n\t" + "ret \n" +); +#endif // ARM64 +#endif diff --git a/libswirl/hw/arm7/arm7_jit_virt_backend.h b/libswirl/hw/arm7/arm7_jit_virt_backend.h new file mode 100644 index 0000000000..7cac4025ec --- /dev/null +++ b/libswirl/hw/arm7/arm7_jit_virt_backend.h @@ -0,0 +1,120 @@ +#pragma once + +#include "types.h" +#include "jit/emitter/arm32/arm_coding.h" + +enum OpType +{ + VOT_Fallback, + VOT_DataOp, + VOT_B, + VOT_BL, + VOT_BR, //Branch (to register) + VOT_Read, //Actually, this handles LDR and STR + //VOT_LDM, //This Isn't used anymore + VOT_MRS, + VOT_MSR, +}; + + +enum OpFlags +{ + OP_SETS_PC = 1, + OP_READS_PC = 32768, + OP_IS_COND = 65536, + OP_MFB = 0x80000000, + + OP_HAS_RD_12 = 2, + OP_HAS_RD_16 = 4, + OP_HAS_RS_0 = 8, + OP_HAS_RS_8 = 16, + OP_HAS_RS_16 = 32, + OP_HAS_FLAGS_READ = 4096, + OP_HAS_FLAGS_WRITE = 8192, + OP_HAS_RD_READ = 16384, //For conditionals + + OP_WRITE_FLAGS = 64, + OP_WRITE_FLAGS_S = 128, + OP_READ_FLAGS = 256, + OP_READ_FLAGS_S = 512, + OP_WRITE_REG = 1024, + OP_READ_REG_1 = 2048, +}; + + +struct Looppoints +{ + void* compilecode; + void* mainloop; + void* dispatch; + void* exit; +}; + +struct ARM7Backend; +struct Arm7Context; + +struct Arm7VirtBackend { + + static Arm7VirtBackend* Create(ARM7Backend* arm, Arm7Context* ctx); + + virtual void GenerateLooppoints(Looppoints* lp) = 0; + + virtual void* armGetEmitPtr() = 0; + + virtual void LoadReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) = 0; + virtual void StoreReg(ARM::eReg rd, u32 regn, ARM::ConditionCode cc = ARM::CC_AL) = 0; + + virtual void LoadFlags() = 0; + + virtual void StoreFlags() = 0; + + virtual void mov(ARM::eReg regd, ARM::eReg regn) = 0; + + virtual void sxtb(ARM::eReg regd, ARM::eReg regs) = 0; + + virtual void zxtb(ARM::eReg regd, ARM::eReg regs) = 0; + + virtual void add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) = 0; + + virtual void sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) = 0; + + virtual void add(ARM::eReg regd, ARM::eReg regn, s32 imm) = 0; + + virtual void lsl(ARM::eReg regd, ARM::eReg regn, u32 imm) = 0; + + virtual void bic(ARM::eReg regd, ARM::eReg regn, u32 imm) = 0; + + virtual void* start_conditional(ARM::ConditionCode cc) = 0; + virtual void end_conditional(void* ref) = 0; + + virtual void imm_to_reg(u32 regn, u32 imm) = 0; + + virtual void MOV32(ARM::eReg regn, u32 imm) = 0; + virtual void MOVPTR(ARM::eReg regn, uintptr_t imm) = 0; + + virtual void call(void* loc, int params, int returns) = 0; + + virtual bool setup() = 0; + + virtual void intpr(u32 opcd) = 0; + + virtual void end(Looppoints* lp, void* codestart, u32 cycles) = 0; + + //sanity check: non branch doesn't set pc + virtual void check_pc(u32 pc) = 0; + + //sanity check: stale cache + virtual void check_cache(u32 opcd, u32 pc) = 0; + + //profiler hook + virtual void prof(OpType opt, u32 op, u32 flags) = 0; + + virtual void Emit32(u32 emit32) = 0; + + virtual void InvalidateJitCache() = 0; + + virtual ARM::eReg GetSafeReg() = 0; +}; + +struct Arm7JitVirt_impl; +void DYNACALL CompileCode(Arm7JitVirt_impl* arm); \ No newline at end of file diff --git a/libswirl/hw/arm7/arm7_jit_virt_x86.cpp b/libswirl/hw/arm7/arm7_jit_virt_x86.cpp new file mode 100644 index 0000000000..dcdc8e1237 --- /dev/null +++ b/libswirl/hw/arm7/arm7_jit_virt_x86.cpp @@ -0,0 +1,438 @@ +#include "types.h" + +#if HOST_CPU == CPU_X86 && FEAT_AREC == DYNAREC_JIT + +#if HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN +#include +#endif +#if HOST_OS == OS_WINDOWS +#include +#endif + + +void armEmit32(u32 emit32); +void* armGetEmitPtr(); + + +#define _DEVEL (1) +#define EMIT_I armEmit32((I)) +#define EMIT_GET_PTR() armGetEmitPtr() + +#include "jit/emitter/arm32/arm_emitter.h" + +#include "arm7.h" +#include "arm7_context.h" +#include "arm7_jit_virt_backend.h" + +#include "virt_arm.h" + + +#include "jit/emitter/x86/x86_emitter.h" + +u8* ARM::emit_opt = 0; +ARM::eReg ARM::reg_addr; +ARM::eReg ARM::reg_dst; +s32 ARM::imma; + + +const u32 ICacheSize = 1024 * 1024; +#if HOST_OS == OS_WINDOWS +u8 ARM7_TCB[ICacheSize + 4096]; +#elif HOST_OS == OS_LINUX + +u8 ARM7_TCB[ICacheSize + 4096] __attribute__((section(".text"))); + +#elif HOST_OS==OS_DARWIN +u8 ARM7_TCB[ICacheSize + 4096] __attribute__((section("__TEXT, .text"))); +#else +#error ARM7_TCB ALLOC +#endif + + + +using namespace ARM; + + +#define arm_reg ctx->regs +#define armMode ctx->armMode + +#define armNextPC arm_reg[R15_ARM_NEXT].I +#define CPUReadMemoryQuick(addr) (*(u32*)&ctx->aica_ram[addr&ctx->aram_mask]) + +//profiler +u32 nfb, ffb, bfb, mfb; + +/* X86 backend + * Uses a mix of + * x86 code + * Virtualised arm code (using the varm interpreter) + * Emulated arm fallbacks (using the aica arm interpreter) + * + * The goal is to run as much code possible under the varm interpreter + * so it will run on arm w/o changes. A few opcodes are missing from varm + * (MOV32 is a notable case) and as such i've added a few varm_* hooks + * + * This code also performs a LOT of compiletime and runtime state/value sanity checks. + * We don't care for speed here ... +*/ +struct Arm7VirtBackendX86 : Arm7VirtBackend { + x86_block* x86e; + ARM7Backend* arm; + Arm7Context* ctx; + + u8* ICache; + u8* icPtr_Base; + u8* icPtr; + + Arm7VirtBackendX86(ARM7Backend* arm, Arm7Context* ctx) : arm(arm), ctx(ctx) { + + //align to next page .. + ICache = (u8*)(((unat)ARM7_TCB + 4095) & ~4095); + +#if HOST_OS==OS_DARWIN + //Can't just mprotect on iOS + munmap(ICache, ICacheSize); + ICache = (u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); +#endif + +#if HOST_OS == OS_WINDOWS + DWORD old; + VirtualProtect(ICache, ICacheSize, PAGE_EXECUTE_READWRITE, &old); +#elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN + + printf("\n\t ARM7_TCB addr: %p | from: %p | addr here: %p\n", ICache, ARM7_TCB, armt_init); + + if (mprotect(ICache, ICacheSize, PROT_EXEC | PROT_READ | PROT_WRITE)) + { + perror("\n\tError - Couldn�t mprotect ARM7_TCB!"); + verify(false); + } + +#if TARGET_IPHONE + memset((u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0), 0xFF, ICacheSize); +#else + memset(ICache, 0xFF, ICacheSize); +#endif + +#endif + + icPtr = ICache; + } + + ARM::eReg GetSafeReg() { + return r3; + } + + void GenerateLooppoints(Looppoints* lp) { + x86e = new x86_block(); + + x86e->Init(0, 0); + x86e->x86_buff = (u8*)EMIT_GET_PTR(); + x86e->x86_size = 1024 * 64; + x86e->do_realloc = false; + + lp->dispatch = &x86e->x86_buff[x86e->x86_indx]; + // arm_dispatch + { + x86e->Emit(op_mov32, EAX, &arm_reg[R15_ARM_NEXT].I); + x86e->Emit(op_and32, EAX, ctx->aram_mask); + x86e->Emit(op_cmp32, &arm_reg[INTR_PEND].I, 0); + + x86_Label* dofiq = x86e->CreateLabel(false, 8); + x86e->Emit(op_jne, dofiq); + x86e->Emit(op_jmp32, x86_mrm(EAX, arm->GetEntrypointBase())); + + + x86e->MarkLabel(dofiq); + x86e->Emit(op_mov32, ECX, (uintptr_t)ctx); + x86e->Emit(op_call, x86_ptr_imm(ARM7Backend::CPUFiq)); + x86e->Emit(op_jmp, x86_ptr_imm(lp->dispatch)); + } + + lp->mainloop = &x86e->x86_buff[x86e->x86_indx]; + // arm_mainloop + { + x86e->Emit(op_push32, ESI); + x86e->Emit(op_mov32, ESI, ECX); + x86e->Emit(op_add32, ESI, &arm_reg[CYCL_CNT].I); + + x86e->Emit(op_mov32, EAX, 0); + x86e->Emit(op_jmp, x86_ptr_imm(lp->dispatch)); + } + + lp->compilecode = &x86e->x86_buff[x86e->x86_indx]; + // arm_compilecode + { + x86e->Emit(op_mov32, ECX, (uintptr_t)arm); + x86e->Emit(op_call, x86_ptr_imm(CompileCode)); + x86e->Emit(op_mov32, EAX, 0); + x86e->Emit(op_jmp, x86_ptr_imm(lp->dispatch)); + } + + lp->exit = &x86e->x86_buff[x86e->x86_indx]; + // arm_exit + { + x86e->Emit(op_mov32, &arm_reg[CYCL_CNT].I, ESI); + x86e->Emit(op_pop32, ESI); + x86e->Emit(op_ret); + } + + //Generate the code & apply fixups/relocations as needed + x86e->Generate(); + + //Use space from the dynarec buffer + icPtr += x86e->x86_indx; + + // Set the reset position + icPtr_Base = icPtr; + + //Delete the x86 emitter ... + delete x86e; + } + + void* armGetEmitPtr() + { + if (icPtr < (ICache + ICacheSize - 1024)) //ifdebug + return static_cast(icPtr); + + return NULL; + } + + void LoadReg(eReg rd, u32 regn, ConditionCode cc = CC_AL) + { + LDR(rd, r8, (u8*)&arm_reg[regn].I - (u8*)&arm_reg[0].I, Offset, cc); + } + void StoreReg(eReg rd, u32 regn, ConditionCode cc = CC_AL) + { + STR(rd, r8, (u8*)&arm_reg[regn].I - (u8*)&arm_reg[0].I, Offset, cc); + } + + void LoadFlags() + { + //Load flags + LoadReg(r0, RN_PSR_FLAGS); + //move them to flags register + MSR(0, 8, r0); + } + + void StoreFlags() + { + //get results from flags register + MRS(r1, 0); + //Store flags + StoreReg(r1, RN_PSR_FLAGS); + } + + void mov(ARM::eReg regd, ARM::eReg regn) + { + MOV(regd, regn); + } + + void sxtb(ARM::eReg regd, ARM::eReg regs) + { + x86e->Emit(op_movsx8to32, EAX, &virt_arm_reg(regs)); + x86e->Emit(op_mov32, &virt_arm_reg(regd), EAX); + } + + void zxtb(ARM::eReg regd, ARM::eReg regs) + { + x86e->Emit(op_movzx8to32, EAX, &virt_arm_reg(regs)); + x86e->Emit(op_mov32, &virt_arm_reg(regd), EAX); + } + + void add(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) + { + ADD(regd, regn, regm); + } + + void sub(ARM::eReg regd, ARM::eReg regn, ARM::eReg regm) + { + SUB(regd, regn, regm); + } + + void add(ARM::eReg regd, ARM::eReg regn, s32 imm) + { + if (imm >= 0) + ADD(regd, regn, imm); + else + SUB(regd, regn, -imm); + } + + void lsl(ARM::eReg regd, ARM::eReg regn, u32 imm) + { + LSL(regd, regn, imm); + } + + void bic(ARM::eReg regd, ARM::eReg regn, u32 imm) + { + BIC(regd, regn, imm); + } + + void* start_conditional(ARM::ConditionCode cc) + { + return NULL; + } + void end_conditional(void* ref) + { + } + + + void imm_to_reg(u32 regn, u32 imm) + { + x86e->Emit(op_mov32, &arm_reg[regn].I, imm); + } + + void MOVPTR(eReg regn, uintptr_t imm) + { + x86e->Emit(op_mov32, &virt_arm_reg(regn), imm); + } + + void MOV32(eReg regn, u32 imm) + { + x86e->Emit(op_mov32, &virt_arm_reg(regn), imm); + } + + void call(void* loc, int params, int returns) + { + verify(params <= 3); + + if (params) { + if (params >= 1) { + x86e->Emit(op_mov32, ECX, &virt_arm_reg(0)); + } + if (params >= 2) { + x86e->Emit(op_mov32, EDX, &virt_arm_reg(1)); + } + if (params >= 3) { + x86e->Emit(op_push32, x86_ptr(&virt_arm_reg(2))); + } + + } + + x86e->Emit(op_call, x86_ptr_imm(loc)); + + if (returns) { + x86e->Emit(op_mov32, &virt_arm_reg(0), EAX); + } + } + + x86_Label* end_lbl; + + bool setup() + { + if (icPtr >= (ICache + ICacheSize - 64 * 1024)) { + return false; + } + + //Setup emitter + x86e = new x86_block(); + x86e->Init(0, 0); + x86e->x86_buff = (u8*)EMIT_GET_PTR(); + + verify(x86e->x86_buff != nullptr); + + x86e->x86_size = 1024 * 64; + x86e->do_realloc = false; + + + //load base reg .. + x86e->Emit(op_mov32, &virt_arm_reg(8), (u32)&arm_reg[0]); + + //the "end" label is used to exit from the block, if a code modification (expected opcode // actual opcode in ram) is detected + end_lbl = x86e->CreateLabel(false, 0); + + return true; + } + + void intpr(u32 opcd) + { + //Call interpreter + x86e->Emit(op_mov32, ECX, (uintptr_t)ctx); + x86e->Emit(op_mov32, EDX, opcd); + x86e->Emit(op_call, x86_ptr_imm(&ARM7Backend::singleOp)); + } + + void end(Looppoints* lp, void* codestart, u32 cycles) + { + //Normal block end + //Move counter to EAX for return, pop ESI, ret + x86e->Emit(op_sub32, ESI, cycles); + x86e->Emit(op_jns, x86_ptr_imm(lp->dispatch)); + x86e->Emit(op_jmp, x86_ptr_imm(lp->exit)); + + //Fluch cache, move counter to EAX, pop, ret + //this should never happen (triggers a breakpoint on x86) + x86e->MarkLabel(end_lbl); + x86e->Emit(op_int3); + x86e->Emit(op_int3); + //x86e->Emit(op_call, x86_ptr_imm(FlushCache)); + x86e->Emit(op_sub32, ESI, cycles); + x86e->Emit(op_jmp, x86_ptr_imm(lp->dispatch)); + + //Generate the code & apply fixups/relocations as needed + x86e->Generate(); + + //Use space from the dynarec buffer + icPtr += x86e->x86_indx; + + //Delete the x86 emitter ... + delete x86e; + } + + //sanity check: non branch doesn't set pc + void check_pc(u32 pc) + { + x86e->Emit(op_cmp32, &armNextPC, pc); + x86_Label* nof = x86e->CreateLabel(false, 0); + x86e->Emit(op_je, nof); + x86e->Emit(op_int3); + x86e->Emit(op_int3); + x86e->Emit(op_int3); + x86e->MarkLabel(nof); + } + + //sanity check: stale cache + void check_cache(u32 opcd, u32 pc) + { + x86e->Emit(op_cmp32, &CPUReadMemoryQuick(pc), opcd); + x86_Label* nof = x86e->CreateLabel(false, 0); + x86e->Emit(op_je, nof); + x86e->Emit(op_int3); + x86e->MarkLabel(nof); + } + + //profiler hook + void prof(OpType opt, u32 op, u32 flags) + { + if (VOT_Fallback != opt) + x86e->Emit(op_add32, &nfb, 1); + else + { + if (flags & OP_SETS_PC) + x86e->Emit(op_add32, &bfb, 1); + else if (flags & OP_MFB) + x86e->Emit(op_add32, &mfb, 1); + else + x86e->Emit(op_add32, &ffb, 1); + } + } + + void Emit32(u32 emit32) + { + if (icPtr >= (ICache + ICacheSize - 1024)) { + die("ICache is full, invalidate old entries ..."); //ifdebug + } + + x86e->Emit(op_mov32, ECX, emit32); + x86e->Emit(op_call, x86_ptr_imm(virt_arm_op)); + } + + void InvalidateJitCache() { + icPtr = icPtr_Base; + } +}; + +Arm7VirtBackend* Arm7VirtBackend::Create(ARM7Backend* arm, Arm7Context* ctx) { + return new Arm7VirtBackendX86(arm, ctx); +} +#endif \ No newline at end of file diff --git a/libswirl/hw/arm7/arm_mem.cpp b/libswirl/hw/arm7/arm_mem.cpp index 0dd0f84154..252ae1e5dc 100644 --- a/libswirl/hw/arm7/arm_mem.cpp +++ b/libswirl/hw/arm7/arm_mem.cpp @@ -3,87 +3,4 @@ #include "types.h" #include "libswirl.h" - -#define REG_L (0x2D00) -#define REG_M (0x2D04) - -//Set to true when aica interrupt is pending -bool aica_interr=false; -u32 aica_reg_L=0; -//Set to true when the out of the intc is 1 -bool e68k_out = false; -u32 e68k_reg_L; -u32 e68k_reg_M=0; //constant ? - -void update_e68k() -{ - if (!e68k_out && aica_interr) - { - //Set the pending signal - //Is L register held here too ? - e68k_out=1; - e68k_reg_L=aica_reg_L; - - update_armintc(); - } -} - -void libARM_InterruptChange(u32 bits,u32 L) -{ - aica_interr=bits!=0; - if (aica_interr) - aica_reg_L=L; - update_e68k(); -} - -void e68k_AcceptInterrupt() -{ - e68k_out=false; - update_e68k(); - update_armintc(); -} - -//Reg reads from arm side .. -template -T arm_ReadReg(u32 addr) -{ - addr&=0x7FFF; - if (addr==REG_L) - return e68k_reg_L; - else if(addr==REG_M) - return e68k_reg_M; //shouldn't really happen - else - return g_AICA->ReadReg(addr,sz); -} -template -void arm_WriteReg(u32 addr,T data) -{ - addr &= 0x7FFF; - if (addr == REG_L) - { - return; // Shouldn't really happen (read only) - } - else if (addr == REG_M) - { - //accept interrupts - if (data & 1) - e68k_AcceptInterrupt(); - } - else - { - return g_AICA->WriteReg(addr, data, sz); - } -} - -//00000000~007FFFFF @DRAM_AREA* -//00800000~008027FF @CHANNEL_DATA -//00802800~00802FFF @COMMON_DATA -//00803000~00807FFF @DSP_DATA - -template u8 arm_ReadReg<1,u8>(u32 adr); -template u16 arm_ReadReg<2,u16>(u32 adr); -template u32 arm_ReadReg<4,u32>(u32 adr); - -template void arm_WriteReg<1>(u32 adr,u8 data); -template void arm_WriteReg<2>(u32 adr,u16 data); -template void arm_WriteReg<4>(u32 adr,u32 data); \ No newline at end of file +#include "hw/aica/aica_mmio.h" diff --git a/libswirl/hw/arm7/arm_mem.h b/libswirl/hw/arm7/arm_mem.h index c8abe8e67c..e69de29bb2 100644 --- a/libswirl/hw/arm7/arm_mem.h +++ b/libswirl/hw/arm7/arm_mem.h @@ -1,57 +0,0 @@ -#pragma once -#include "types.h" -#include "hw/aica/aica_mem.h" - -template -T arm_ReadReg(u32 addr); -template -void arm_WriteReg(u32 addr,T data); - -template -static inline T DYNACALL scpu_ReadMemArm(u32 addr) -{ - addr&=0x00FFFFFF; - if (addr<0x800000) - { - T rv=*(T*)&aica_ram[addr&(ARAM_MASK-(sz-1))]; - - if (unlikely(sz==4 && addr&3)) - { - u32 sf=(addr&3)*8; - return (rv>>sf) | (rv<<(32-sf)); - } - else - return rv; - } - else - { - return arm_ReadReg(addr); - } -} - -template -static inline void DYNACALL scpu_WriteMemArm(u32 addr,T data) -{ - addr&=0x00FFFFFF; - if (addr<0x800000) - { - *(T*)&aica_ram[addr&(ARAM_MASK-(sz-1))]=data; - } - else - { - arm_WriteReg(addr,data); - } -} - -#define arm_ReadMem8 scpu_ReadMemArm<1,u8> -#define arm_ReadMem16 scpu_ReadMemArm<2,u16> -#define arm_ReadMem32 scpu_ReadMemArm<4,u32> - -#define arm_WriteMem8 scpu_WriteMemArm<1,u8> -#define arm_WriteMem16 scpu_WriteMemArm<2,u16> -#define arm_WriteMem32 scpu_WriteMemArm<4,u32> - - -extern bool e68k_out; - -void update_armintc(); diff --git a/libswirl/hw/arm7/virt_arm.cpp b/libswirl/hw/arm7/virt_arm.cpp index ffd6a114b9..2c04dc076a 100644 --- a/libswirl/hw/arm7/virt_arm.cpp +++ b/libswirl/hw/arm7/virt_arm.cpp @@ -90,7 +90,7 @@ namespace VARM #include "arm-new.h" - verify(reg[15].I==0); + verify(reg[15].I==0 || reg[15].I == 4); verify(arm_ArmNextPC==0); return clockTicks; diff --git a/libswirl/hw/gdrom/gdromv3.cpp b/libswirl/hw/gdrom/gdromv3.cpp index 41c793d051..6891bacf75 100644 --- a/libswirl/hw/gdrom/gdromv3.cpp +++ b/libswirl/hw/gdrom/gdromv3.cpp @@ -1021,7 +1021,7 @@ struct GDRomV3_impl final : MMIODevice { } } - void gdrom_serialize(void** data, unsigned int* total_size) { + void serialize(void** data, unsigned int* total_size) { REICAST_S(sns_asc); REICAST_S(sns_ascq); @@ -1049,7 +1049,7 @@ struct GDRomV3_impl final : MMIODevice { REICAST_S(ByteCount); } - bool gdrom_unserialize(void** data, unsigned int* total_size) { + void unserialize(void** data, unsigned int* total_size) { REICAST_US(sns_asc); REICAST_US(sns_ascq); @@ -1075,8 +1075,6 @@ struct GDRomV3_impl final : MMIODevice { REICAST_US(SecNumber); REICAST_US(GDStatus); REICAST_US(ByteCount); - - return true; } int Update(int i, int c, int j) @@ -1227,27 +1225,13 @@ MMIODevice* Create_GDRomDevice(SystemBus* sb, ASIC* asic) { //disk changes etc void libCore_gdrom_disc_change() { - if (g_GDRomDrive) dynamic_cast(g_GDRomDrive)->gd_setdisc(); + if (sh4_cpu) { + auto gdd = sh4_cpu->GetA0H(A0H_GDROM); + if (gdd) gdd->gd_setdisc(); + } } void libCore_CDDA_Sector(s16* sector) { - dynamic_cast(g_GDRomDrive)->ReadCDDA(sector); -} -/* -u32 ReadMem_gdrom(u32 Addr, u32 sz) -{ - return dynamic_cast(g_GDRomDrive)->Read(Addr, sz); -} -void WriteMem_gdrom(u32 Addr, u32 data, u32 sz) -{ - dynamic_cast(g_GDRomDrive)->Write(Addr, data, sz); -} -*/ -void gdrom_serialize(void** data, unsigned int* total_size) { - dynamic_cast(g_GDRomDrive)->gdrom_serialize(data, total_size); -} - -bool gdrom_unserialize(void** data, unsigned int* total_size) { - return dynamic_cast(g_GDRomDrive)->gdrom_unserialize(data, total_size); + sh4_cpu->GetA0H(A0H_GDROM)->ReadCDDA(sector); } \ No newline at end of file diff --git a/libswirl/hw/holly/holly_intc.cpp b/libswirl/hw/holly/holly_intc.cpp index 59ad596712..34c27b0c8f 100644 --- a/libswirl/hw/holly/holly_intc.cpp +++ b/libswirl/hw/holly/holly_intc.cpp @@ -13,7 +13,7 @@ //TODO: MOVE THIS u32 SB_ISTNRM; -struct ASICDevice_impl : ASIC { +struct ASICDevice_impl final : ASIC { SystemBus* sb; ASICDevice_impl(SystemBus* sb) : sb(sb) { } diff --git a/libswirl/hw/holly/sb.cpp b/libswirl/hw/holly/sb.cpp index 81af6c87d2..d49838e8db 100644 --- a/libswirl/hw/holly/sb.cpp +++ b/libswirl/hw/holly/sb.cpp @@ -31,7 +31,7 @@ Array sb_regs(0x540); u32 SB_FFST_rc; u32 SB_FFST; -struct SystemBus_impl : SystemBus { +struct SystemBus_impl final : SystemBus { u32 sbio_read_noacc(u32 addr) { verify(false); diff --git a/libswirl/hw/maple/maple_cfg.h b/libswirl/hw/maple/maple_cfg.h index 75d4d116ed..4fba8e1958 100644 --- a/libswirl/hw/maple/maple_cfg.h +++ b/libswirl/hw/maple/maple_cfg.h @@ -58,12 +58,10 @@ struct IMapleConfigMap virtual ~IMapleConfigMap() {} }; -#if DC_PLATFORM == DC_PLATFORM_DREAMCAST void mcfg_CreateDevices(); -#else void mcfg_CreateNAOMIJamma(); void mcfg_CreateAtomisWaveControllers(); -#endif + void mcfg_DestroyDevices(); void mcfg_SerializeDevices(void **data, unsigned int *total_size); diff --git a/libswirl/hw/maple/maple_if.cpp b/libswirl/hw/maple/maple_if.cpp index 71d17eee0a..78a623d92c 100644 --- a/libswirl/hw/maple/maple_if.cpp +++ b/libswirl/hw/maple/maple_if.cpp @@ -59,7 +59,7 @@ bool IsOnSh4Ram(u32 addr) u32 dmacount=0; -struct MapleDevice : MMIODevice { +struct MapleDevice final : MMIODevice { void SB_MSHTCL_Write(u32 addr, u32 data) { @@ -247,61 +247,68 @@ struct MapleDevice : MMIODevice { { } -}; -MMIODevice* Create_MapleDevice(SystemBus* sb, ASIC* asic) { - return new MapleDevice(sb, asic); -} -void maple_vblank() -{ - if (SB_MDEN & 1) + void OnVblank() { - if (SB_MDTSEL & 1) + if (SB_MDEN & 1) { - if (maple_ddt_pending_reset) + if (SB_MDTSEL & 1) { - //printf("DDT vblank ; reset pending\n"); + if (maple_ddt_pending_reset) + { + //printf("DDT vblank ; reset pending\n"); + } + else + { + //printf("DDT vblank\n"); + SB_MDST = 1; + maple_DoDma(); + + SB_MDST = 0; + if ((SB_MSYS >> 12) & 1) + { + maple_ddt_pending_reset = true; + } + } } else { - //printf("DDT vblank\n"); - SB_MDST = 1; - reinterpret_cast(sh4_cpu->GetA0Handler(A0H_MAPLE))->maple_DoDma(); - - SB_MDST = 0; - if ((SB_MSYS >> 12) & 1) - { - maple_ddt_pending_reset = true; - } + maple_ddt_pending_reset = false; } } - else +#if DC_PLATFORM == DC_PLATFORM_DREAMCAST + handle_reconnect_periodical(); +#endif + } + + u64 reconnect_time; + void handle_reconnect_periodical() + { + if (reconnect_time != 0 && reconnect_time <= sh4_sched_now64()) { - maple_ddt_pending_reset = false; + reconnect_time = 0; + mcfg_CreateDevices(); } } -#if DC_PLATFORM == DC_PLATFORM_DREAMCAST - maple_handle_reconnect(); -#endif -} + void ReconnectDevices() + { + mcfg_DestroyDevices(); + reconnect_time = sh4_sched_now64() + SH4_MAIN_CLOCK / 10; + } +}; -#if DC_PLATFORM == DC_PLATFORM_DREAMCAST -static u64 reconnect_time; +MMIODevice* Create_MapleDevice(SystemBus* sb, ASIC* asic) { + return new MapleDevice(sb, asic); +} -void maple_ReconnectDevices() +void maple_vblank() { - mcfg_DestroyDevices(); - reconnect_time = sh4_sched_now64() + SH4_MAIN_CLOCK / 10; + sh4_cpu->GetA0H(A0H_MAPLE)->OnVblank(); } -static void maple_handle_reconnect() +void maple_ReconnectDevices() { - if (reconnect_time != 0 && reconnect_time <= sh4_sched_now64()) - { - reconnect_time = 0; - mcfg_CreateDevices(); - } -} -#endif + sh4_cpu->GetA0H(A0H_MAPLE)->ReconnectDevices(); +} \ No newline at end of file diff --git a/libswirl/hw/mem/_vmem.cpp b/libswirl/hw/mem/_vmem.cpp index e6e0a7eec7..a0b02b85df 100644 --- a/libswirl/hw/mem/_vmem.cpp +++ b/libswirl/hw/mem/_vmem.cpp @@ -440,7 +440,7 @@ bool _vmem_bm_LockedWrite(u8* address) { return false; } -bool _vmem_reserve() { +bool _vmem_reserve(VLockedMemory* vram, VLockedMemory* aica_ram, u32 aram_size) { // TODO: Static assert? verify((sizeof(Sh4RCB)%PAGE_SIZE)==0); @@ -464,11 +464,11 @@ bool _vmem_reserve() { mem_b.size = RAM_SIZE; mem_b.data = (u8*)malloc_pages(RAM_SIZE); - vram.size = VRAM_SIZE; - vram.data = (u8*)malloc_pages(VRAM_SIZE); + vram->size = VRAM_SIZE; + vram->data = (u8*)malloc_pages(VRAM_SIZE); - aica_ram.size = ARAM_SIZE; - aica_ram.data = (u8*)malloc_pages(ARAM_SIZE); + aica_ram->size = aram_size; + aica_ram->data = (u8*)malloc_pages(aram_size); } else { printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB"); @@ -479,8 +479,8 @@ bool _vmem_reserve() { #define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) const vmem_mapping mem_mappings[] = { {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused - {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica, wraps too - {0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, + {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, aram_size, false}, // Aica, wraps too + {0x20000000, 0x20000000+aram_size, MAP_ARAM_START_OFFSET, aram_size, true}, {0x01000000, 0x04000000, 0, 0, false}, // More unused {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) @@ -493,19 +493,19 @@ bool _vmem_reserve() { vmem_platform_create_mappings(&mem_mappings[0], sizeof(mem_mappings) / sizeof(mem_mappings[0])); // Point buffers to actual data pointers - aica_ram.size = ARAM_SIZE; - aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writtable AICA addrspace + aica_ram->size = aram_size; + aica_ram->data = &virt_ram_base[0x20000000]; // Points to the writtable AICA addrspace - vram.size = VRAM_SIZE; - vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writtable and lockable) + vram->size = VRAM_SIZE; + vram->data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writtable and lockable) mem_b.size = RAM_SIZE; mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror } // Clear out memory - aica_ram.Zero(); - vram.Zero(); + aica_ram->Zero(); + vram->Zero(); mem_b.Zero(); return true; @@ -514,13 +514,13 @@ bool _vmem_reserve() { #define freedefptr(x) \ if (x) { free(x); x = NULL; } -void _vmem_release() { +void _vmem_release(VLockedMemory* vram, VLockedMemory* aica_ram) { if (virt_ram_base) vmem_platform_destroy(); else { freedefptr(p_sh4rcb); - freedefptr(vram.data); - freedefptr(aica_ram.data); + freedefptr(vram->data); + freedefptr(aica_ram->data); freedefptr(mem_b.data); } } diff --git a/libswirl/hw/mem/_vmem.h b/libswirl/hw/mem/_vmem.h index 402260a1e2..fe9b568532 100644 --- a/libswirl/hw/mem/_vmem.h +++ b/libswirl/hw/mem/_vmem.h @@ -95,8 +95,8 @@ void DYNACALL _vmem_WriteMem32(u32 Address,u32 data); void DYNACALL _vmem_WriteMem64(u32 Address,u64 data); //should be called at start up to ensure it will succeed :) -bool _vmem_reserve(); -void _vmem_release(); +bool _vmem_reserve(VLockedMemory* vram, VLockedMemory* aica_ram, u32 aram_size); +void _vmem_release(VLockedMemory* vram, VLockedMemory* aica_ram); //dynarec helpers void _vmem_get_ptrs(u32 sz,bool write,void*** vmap,void*** func); diff --git a/libswirl/hw/pvr/Renderer_if.cpp b/libswirl/hw/pvr/Renderer_if.cpp index 3f8a0e80b9..1ecc1a2d35 100644 --- a/libswirl/hw/pvr/Renderer_if.cpp +++ b/libswirl/hw/pvr/Renderer_if.cpp @@ -111,12 +111,12 @@ bool pend_rend = false; static std::map backends; -static void rend_create_renderer() +static void rend_create_renderer(u8* vram) { if (backends.count(settings.pvr.backend)) { printf("RendIF: renderer: %s\n", settings.pvr.backend.c_str()); - renderer = backends[settings.pvr.backend].create(); + renderer = backends[settings.pvr.backend].create(vram); renderer->backendInfo = backends[settings.pvr.backend]; } else @@ -125,13 +125,13 @@ static void rend_create_renderer() auto main = (*vec.begin()); - renderer = main.create(); + renderer = main.create(vram); renderer->backendInfo = main; if ((++vec.begin()) != vec.end()) { auto fallback = (*(++vec.begin())); - fallback_renderer = fallback.create(); + fallback_renderer = fallback.create(vram); fallback_renderer->backendInfo = fallback; } @@ -143,10 +143,11 @@ static void rend_create_renderer() } } -void rend_init_renderer() +void rend_init_renderer(u8* vram) { if (renderer == NULL) - rend_create_renderer(); + rend_create_renderer(vram); + if (!renderer->Init()) { printf("RendIF: Renderer %s did not initialize. Falling back to %s.\n", @@ -183,13 +184,15 @@ void rend_term_renderer() } } -static bool rend_frame(TA_context* ctx, bool draw_osd) { +static bool rend_frame(u8* vram, TA_context* ctx, bool draw_osd) { +#if FIXME if (dump_frame_switch) { char name[32]; sprintf(name, "dcframe-%d", FrameCount); tactx_write_frame(name, _pvrrc, &vram[0]); dump_frame_switch = false; } +#endif if (renderer_changed) { @@ -198,7 +201,7 @@ static bool rend_frame(TA_context* ctx, bool draw_osd) { } if (renderer == nullptr) { - rend_init_renderer(); + rend_init_renderer(vram); } bool proc = renderer->Process(ctx); @@ -328,7 +331,7 @@ void rend_resize(int width, int height) { } -void rend_start_render() +void rend_start_render(u8* vram) { render_called = true; pend_rend = false; @@ -385,7 +388,7 @@ void rend_start_render() //tactx_Recycle(ctx); ctx = read_frame("frames/dcframe-SoA-intro-tr-autosort"); //printf("REP: %.2f ms\n",render_end_pending_cycles/200000.0); - FillBGP(ctx); + FillBGP(vram, ctx); ctx->rend.isRTT=is_rtt; @@ -422,7 +425,7 @@ void rend_start_render() verify(_pvrrc == ctx); - bool do_swp = rend_frame(_pvrrc, true); + bool do_swp = rend_frame(vram, _pvrrc, true); if (_pvrrc->rend.isRTT) re.Set(); diff --git a/libswirl/hw/pvr/Renderer_if.h b/libswirl/hw/pvr/Renderer_if.h index e5087e9442..f738abf8f7 100644 --- a/libswirl/hw/pvr/Renderer_if.h +++ b/libswirl/hw/pvr/Renderer_if.h @@ -1,16 +1,16 @@ #pragma once -#include "drkPvr.h" + #include "ta_ctx.h" extern u32 VertexCount; extern u32 FrameCount; -void rend_init_renderer(); +void rend_init_renderer(u8* vram); void rend_term_renderer(); void rend_vblank(); -void rend_start_render(); +void rend_start_render(u8* vram); void rend_end_render(); void rend_set_fb_scale(float x,float y); @@ -28,7 +28,7 @@ struct rendererbackend_t { string slug; string desc; int priority; - Renderer* (*create)(); + Renderer* (*create)(u8* vram); }; struct Renderer diff --git a/libswirl/hw/pvr/drkPvr.cpp b/libswirl/hw/pvr/drkPvr.cpp deleted file mode 100644 index c3beb1c340..0000000000 --- a/libswirl/hw/pvr/drkPvr.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// drkPvr.cpp : Defines the entry point for the DLL application. -// - -/* - Plugin structure - Interface - SPG - TA - Renderer -*/ - -#include "drkPvr.h" - -#include "ta.h" -#include "spg.h" -#include "pvr_regs.h" -#include "pvr_mem.h" -#include "Renderer_if.h" - -struct PowerVR_impl : PowerVR { - - void Reset(bool Manual) - { - - } - - s32 Init() - { - rend_init_renderer(); - - return rv_ok; - } - - //called when exiting from sh4 thread , from the new thread context (for any thread specific de init) :P - void Term() - { - rend_term_renderer(); - } -}; - -PowerVR* PowerVR::Create() { - return new PowerVR_impl(); -} \ No newline at end of file diff --git a/libswirl/hw/pvr/drkPvr.h b/libswirl/hw/pvr/drkPvr.h deleted file mode 100644 index 4859097a59..0000000000 --- a/libswirl/hw/pvr/drkPvr.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "config.h" - - - -//bleh stupid windoze header -#include "types.h" -#include -#include -#include - -#define fverify verify - -#include "helper_classes.h" - -extern int render_end_schid; \ No newline at end of file diff --git a/libswirl/hw/pvr/helper_classes.h b/libswirl/hw/pvr/helper_classes.h index 2e1f01d630..2c34c6fec7 100644 --- a/libswirl/hw/pvr/helper_classes.h +++ b/libswirl/hw/pvr/helper_classes.h @@ -1,4 +1,5 @@ #pragma once +#include "types.h" template struct List diff --git a/libswirl/hw/pvr/pvr_mem.cpp b/libswirl/hw/pvr/pvr_mem.cpp index 6cd0fc17a3..56354a043b 100644 --- a/libswirl/hw/pvr/pvr_mem.cpp +++ b/libswirl/hw/pvr/pvr_mem.cpp @@ -130,12 +130,12 @@ INLINE void YUV_Block384(u8* in, u8* out) YUV_Block8x8(inuv+36,iny+192,p_out+YUV_x_size*8*2+8*2); //(8,8) } -INLINE void YUV_ConvertMacroBlock(u8* datap) +INLINE void YUV_ConvertMacroBlock(u8* vram, u8* datap) { //do shit TA_YUV_TEX_CNT++; - YUV_Block384((u8*)datap,vram.data + YUV_dest); + YUV_Block384((u8*)datap, vram + YUV_dest); YUV_dest+=32; @@ -159,7 +159,7 @@ INLINE void YUV_ConvertMacroBlock(u8* datap) } } -void YUV_data(u32* data , u32 count) +void YUV_data(u32* data , u32 count, u8* vram) { if (YUV_blockcount==0) { @@ -183,12 +183,12 @@ void YUV_data(u32* data , u32 count) if (YUV_index == 0) { // Avoid copy - YUV_ConvertMacroBlock((u8 *)data); //convert block + YUV_ConvertMacroBlock(vram, (u8 *)data); //convert block } else { memcpy(&YUV_tempdata[YUV_index >> 2], data, dr);//copy em - YUV_ConvertMacroBlock((u8 *)&YUV_tempdata[0]); //convert block + YUV_ConvertMacroBlock(vram, (u8 *)&YUV_tempdata[0]); //convert block YUV_index = 0; } data += dr >> 2; //count em @@ -234,11 +234,11 @@ u8 DYNACALL pvr_read_area1_8(SuperH4* sh4, u32 addr) u16 DYNACALL pvr_read_area1_16(SuperH4* sh4, u32 addr) { - return *(u16*)&vram[pvr_map32(addr)]; + return *(u16*)&sh4->vram[pvr_map32(addr)]; } u32 DYNACALL pvr_read_area1_32(SuperH4* sh4, u32 addr) { - return *(u32*)&vram[pvr_map32(addr)]; + return *(u32*)&sh4->vram[pvr_map32(addr)]; } //write @@ -255,7 +255,7 @@ void DYNACALL pvr_write_area1_16(SuperH4* sh4, u32 addr,u16 data) { fb_dirty = true; } - *(u16*)&vram[pvr_map32(addr)]=data; + *(u16*)&sh4->vram[pvr_map32(addr)]=data; } void DYNACALL pvr_write_area1_32(SuperH4* sh4, u32 addr,u32 data) { @@ -266,26 +266,26 @@ void DYNACALL pvr_write_area1_32(SuperH4* sh4, u32 addr,u32 data) { fb_dirty = true; } - *(u32*)&vram[pvr_map32(addr)] = data; + *(u32*)&sh4->vram[pvr_map32(addr)] = data; } -void TAWrite(u32 address,u32* data,u32 count) +void TAWrite(u32 address,u32* data,u32 count, u8* vram) { u32 address_w=address&0x1FFFFFF;//correct ? if (address_w<0x800000)//TA poly { - ta_vtx_data(data,count); + ta_vtx_data(data, count); } else if(address_w<0x1000000) //Yuv Converter { - YUV_data(data,count); + YUV_data(data, count, vram); } else //Vram Writef { //shouldn't really get here (?) -> works on dc :D need to handle lmmodes //printf("Vram TAWrite 0x%X , bkls %d\n",address,count); verify(SB_LMMODE0 == 0); - memcpy(&vram.data[address&VRAM_MASK],data,count*32); + memcpy(&vram[address & VRAM_MASK], data, count * 32); } } @@ -302,13 +302,15 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb) u32 address_w=address&0x1FFFFFF;//correct ? u8* sq=&sqb[address&0x20]; + u8* vram = sqb + 512 + 0x04000000; + if (likely(address_w<0x800000))//TA poly { ta_vtx_data32(sq); } else if(likely(address_w<0x1000000)) //Yuv Converter { - YUV_data((u32*)sq,1); + YUV_data((u32*)sq, 1, vram); } else //Vram Writef { @@ -317,7 +319,6 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb) if (SB_LMMODE0 == 0) { // 64b path - u8* vram=sqb+512+0x04000000; MemWrite32(&vram[address_w&(VRAM_MASK-0x1F)],sq); } else @@ -337,8 +338,7 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb) //Reset -> Reset - Initialise to default values void pvr_Reset(bool Manual) { - if (!Manual) - vram.Zero(); + } #define VRAM_BANK_BIT 0x400000 @@ -362,11 +362,11 @@ u32 pvr_map32(u32 offset32) } -f32 vrf(u32 addr) +f32 vrf(u8* vram, u32 addr) { return *(f32*)&vram[pvr_map32(addr)]; } -u32 vri(u32 addr) +u32 vri(u8* vram, u32 addr) { return *(u32*)&vram[pvr_map32(addr)]; } diff --git a/libswirl/hw/pvr/pvr_mem.h b/libswirl/hw/pvr/pvr_mem.h index 5dc3ae3c7d..38a96b4469 100644 --- a/libswirl/hw/pvr/pvr_mem.h +++ b/libswirl/hw/pvr/pvr_mem.h @@ -5,11 +5,11 @@ u32 pvr_map32(u32 offset32); -f32 vrf(u32 addr); -u32 vri(u32 addr); +f32 vrf(u8* vram, u32 addr); +u32 vri(u8* vram, u32 addr); //vram 32-64b -extern VLockedMemory vram; + extern bool fb_dirty; @@ -34,7 +34,7 @@ void pvr_Term(); //Reset -> Reset - Initialise void pvr_Reset(bool Manual); -void TAWrite(u32 address,u32* data,u32 count); +void TAWrite(u32 address,u32* data,u32 count, u8* vram); extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb); void YUV_init(); diff --git a/libswirl/hw/pvr/pvr_regs.h b/libswirl/hw/pvr/pvr_regs.h index df45f9ab63..f757c5a4f3 100644 --- a/libswirl/hw/pvr/pvr_regs.h +++ b/libswirl/hw/pvr/pvr_regs.h @@ -1,5 +1,5 @@ #pragma once -#include "drkPvr.h" +#include "types.h" #define pvr_RegSize (0x8000) #define pvr_RegMask (pvr_RegSize-1) diff --git a/libswirl/hw/pvr/pvr_sb_regs.cpp b/libswirl/hw/pvr/pvr_sb_regs.cpp index 8be7a58d92..cc0910372b 100644 --- a/libswirl/hw/pvr/pvr_sb_regs.cpp +++ b/libswirl/hw/pvr/pvr_sb_regs.cpp @@ -31,7 +31,7 @@ struct PVRDevice : MMIODevice { if (1 & data) { SB_C2DST = 1; - DMAC_Ch2St(); + DMAC_Ch2St(vram); } } //PVR-DMA @@ -146,8 +146,9 @@ struct PVRDevice : MMIODevice { SystemBus* sb; ASIC* asic; SPG* spg; + u8* vram; - PVRDevice(SystemBus* sb, ASIC* asic, SPG* spg) : sb(sb), asic(asic), spg(spg) { } + PVRDevice(SystemBus* sb, ASIC* asic, SPG* spg, u8* vram) : sb(sb), asic(asic), spg(spg), vram(vram) { } u32 Read(u32 addr, u32 sz) { @@ -171,7 +172,7 @@ struct PVRDevice : MMIODevice { if (addr == STARTRENDER_addr) { //start render - rend_start_render(); + rend_start_render(vram); return; } @@ -293,6 +294,6 @@ struct PVRDevice : MMIODevice { }; -MMIODevice* Create_PVRDevice(SystemBus* sb, ASIC* asic, SPG* spg) { - return new PVRDevice(sb, asic, spg); +MMIODevice* Create_PVRDevice(SystemBus* sb, ASIC* asic, SPG* spg, u8* vram) { + return new PVRDevice(sb, asic, spg, vram); } \ No newline at end of file diff --git a/libswirl/hw/pvr/spg.h b/libswirl/hw/pvr/spg.h index 95300dbc68..799fe8d489 100644 --- a/libswirl/hw/pvr/spg.h +++ b/libswirl/hw/pvr/spg.h @@ -1,5 +1,5 @@ #pragma once -#include "drkPvr.h" + #include "hw/sh4/sh4_mmio.h" struct ASIC; @@ -13,21 +13,4 @@ struct SPG : MMIODevice { static SPG* Create(ASIC* asic); }; -#if 0 -bool spg_Init(); -void spg_Term(); -void spg_Reset(bool Manual); - -//#define Frame_Cycles (DCclock/60) - -//need to replace 511 with correct value -//#define Line_Cycles (Frame_Cycles/511) - -void spgUpdatePvr(u32 cycles); -bool spg_Init(); -void spg_Term(); -void spg_Reset(bool Manual); -void CalculateSync(); -#endif - void read_lightgun_position(int x, int y); \ No newline at end of file diff --git a/libswirl/hw/pvr/ta.h b/libswirl/hw/pvr/ta.h index eb8505bf68..16a9dbdda5 100644 --- a/libswirl/hw/pvr/ta.h +++ b/libswirl/hw/pvr/ta.h @@ -1,5 +1,5 @@ #pragma once -#include "drkPvr.h" + #include "hw/holly/holly_intc.h" #include "hw/sh4/sh4_if.h" #include "oslib/oslib.h" @@ -19,6 +19,6 @@ void ta_vtx_SoftReset(); void DYNACALL ta_vtx_data32(void* data); void ta_vtx_data(u32* data, u32 size); -bool ta_parse_vdrc(TA_context* ctx); +bool ta_parse_vdrc(u8* vram, TA_context* ctx); #define TRIG_SORT 1 diff --git a/libswirl/hw/pvr/ta_ctx.cpp b/libswirl/hw/pvr/ta_ctx.cpp index dea5c65e35..953bc5dea7 100644 --- a/libswirl/hw/pvr/ta_ctx.cpp +++ b/libswirl/hw/pvr/ta_ctx.cpp @@ -379,7 +379,7 @@ void tactx_write_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref fclose(fw); } -TA_context* tactx_read_frame(const char* file, u8* vram_ref) { +TA_context* tactx_read_frame(const char* file, u8* vram, u8* vram_ref) { FILE* fw = fopen(file, "rb"); if (fw == NULL) @@ -420,8 +420,6 @@ TA_context* tactx_read_frame(const char* file, u8* vram_ref) { fread(&t, 1, sizeof(t), fw); verify(t == VRAM_SIZE); - vram.UnLockRegion(0, VRAM_SIZE); - uLongf compressed_size; fread(&compressed_size, 1, sizeof(compressed_size), fw); @@ -429,7 +427,7 @@ TA_context* tactx_read_frame(const char* file, u8* vram_ref) { u8* gz_stream = (u8*)malloc(compressed_size); fread(gz_stream, 1, compressed_size, fw); uLongf tl = t; - verify(uncompress(vram.data, &tl, gz_stream, compressed_size) == Z_OK); + verify(uncompress(vram, &tl, gz_stream, compressed_size) == Z_OK); free(gz_stream); fread(&t, 1, sizeof(t), fw); diff --git a/libswirl/hw/pvr/ta_ctx.h b/libswirl/hw/pvr/ta_ctx.h index 0386214908..2f3e8079f9 100644 --- a/libswirl/hw/pvr/ta_ctx.h +++ b/libswirl/hw/pvr/ta_ctx.h @@ -1,6 +1,7 @@ #pragma once #include "ta.h" #include "pvr_regs.h" +#include "helper_classes.h" #include "oslib/threading.h" // helper for 32 byte aligned memory allocation @@ -272,8 +273,8 @@ bool TryDecodeTARC(); void VDecEnd(); //must be moved to proper header -void FillBGP(TA_context* ctx); -bool UsingAutoSort(int pass_number); +void FillBGP(u8* vram, TA_context* ctx); +bool UsingAutoSort(u8* vram, int pass_number); bool rend_framePending(); diff --git a/libswirl/hw/pvr/ta_vtx.cpp b/libswirl/hw/pvr/ta_vtx.cpp index f25ea0f4dd..4de06f1e30 100644 --- a/libswirl/hw/pvr/ta_vtx.cpp +++ b/libswirl/hw/pvr/ta_vtx.cpp @@ -1507,7 +1507,7 @@ data+=poly_size; } }; -static bool ClearZBeforePass(int pass_number); +static bool ClearZBeforePass(u8* vram, int pass_number); FifoSplitter<0> TAFifo0; @@ -1516,7 +1516,7 @@ int ta_parse_cnt = 0; /* Also: gotta stage textures here */ -bool ta_parse_vdrc(TA_context* ctx) +bool ta_parse_vdrc(u8* vram, TA_context* ctx) { bool rv=false; verify( vd_ctx == 0); @@ -1550,8 +1550,8 @@ bool ta_parse_vdrc(TA_context* ctx) render_pass->pt_count = vd_rc.global_param_pt.used(); render_pass->tr_count = vd_rc.global_param_tr.used(); render_pass->mvo_tr_count = vd_rc.global_param_mvo_tr.used(); - render_pass->autosort = UsingAutoSort(pass); - render_pass->z_clear = ClearZBeforePass(pass); + render_pass->autosort = UsingAutoSort(vram, pass); + render_pass->z_clear = ClearZBeforePass(vram, pass); } bool empty_context = true; @@ -1583,7 +1583,7 @@ bool ta_parse_vdrc(TA_context* ctx) //decode a vertex in the native pvr format //used for bg poly -void decode_pvr_vertex(u32 base,u32 ptr,Vertex* cv) +void decode_pvr_vertex(u8* vram, u32 base,u32 ptr,Vertex* cv) { //ISP //TSP @@ -1592,9 +1592,9 @@ void decode_pvr_vertex(u32 base,u32 ptr,Vertex* cv) TSP tsp; TCW tcw; - isp.full=vri(base); - tsp.full=vri(base+4); - tcw.full=vri(base+8); + isp.full=vri(vram, base); + tsp.full=vri(vram, base+4); + tcw.full=vri(vram, base+8); //XYZ //UV @@ -1602,33 +1602,33 @@ void decode_pvr_vertex(u32 base,u32 ptr,Vertex* cv) //Offset Col //XYZ are _allways_ there :) - cv->x=vrf(ptr);ptr+=4; - cv->y=vrf(ptr);ptr+=4; - cv->z=vrf(ptr);ptr+=4; + cv->x=vrf(vram, ptr);ptr+=4; + cv->y=vrf(vram, ptr);ptr+=4; + cv->z=vrf(vram, ptr);ptr+=4; if (isp.Texture) { //Do texture , if any if (isp.UV_16b) { - u32 uv=vri(ptr); + u32 uv=vri(vram, ptr); cv->u = f16((u16)uv); cv->v = f16((u16)(uv >> 16)); ptr+=4; } else { - cv->u=vrf(ptr);ptr+=4; - cv->v=vrf(ptr);ptr+=4; + cv->u=vrf(vram, ptr);ptr+=4; + cv->v=vrf(vram, ptr);ptr+=4; } } //Color - u32 col=vri(ptr);ptr+=4; + u32 col=vri(vram, ptr);ptr+=4; vert_packed_color_(cv->col,col); if (isp.Offset) { //Intensity color (can be missing too ;p) - u32 col=vri(ptr);ptr+=4; + u32 col=vri(vram, ptr);ptr+=4; vert_packed_color_(cv->spc,col); } } @@ -1663,7 +1663,7 @@ void vtxdec_init() static OnLoad ol_vtxdec(&vtxdec_init); -void FillBGP(TA_context* ctx) +void FillBGP(u8* vram, TA_context* ctx) { //Render pre-code @@ -1698,9 +1698,9 @@ void FillBGP(TA_context* ctx) bgpp->texid = -1; - bgpp->isp.full=vri(strip_base); - bgpp->tsp.full=vri(strip_base+4); - bgpp->tcw.full=vri(strip_base+8); + bgpp->isp.full=vri(vram, strip_base); + bgpp->tsp.full=vri(vram, strip_base+4); + bgpp->tcw.full=vri(vram, strip_base+8); bgpp->tcw1.full = -1; bgpp->tsp1.full = -1; bgpp->texid1 = -1; @@ -1720,7 +1720,7 @@ void FillBGP(TA_context* ctx) float scale_x= (SCALER_CTL.hscale) ? 2.f:1.f; //if AA hack the hacked pos value hacks for (int i=0;i<3;i++) { - decode_pvr_vertex(strip_base,vertex_ptr,&cv[i]); + decode_pvr_vertex(vram, strip_base,vertex_ptr,&cv[i]); vertex_ptr+=strip_vs; } @@ -1745,12 +1745,12 @@ void FillBGP(TA_context* ctx) cv[3].y=480+2000; } -static RegionArrayTile getRegionTile(int pass_number) +static RegionArrayTile getRegionTile(u8* vram, int pass_number) { u32 addr = REGION_BASE; bool empty_first_region = true; for (int i = 0; i < 5; i++) - if ((vri(addr + (i + 1) * 4) & 0x80000000) == 0) + if ((vri(vram, addr + (i + 1) * 4) & 0x80000000) == 0) { empty_first_region = false; break; @@ -1759,12 +1759,12 @@ static RegionArrayTile getRegionTile(int pass_number) addr += 6 * 4; RegionArrayTile tile; - tile.full = vri(addr + pass_number * 6 * 4); + tile.full = vri(vram, addr + pass_number * 6 * 4); return tile; } -bool UsingAutoSort(int pass_number) +bool UsingAutoSort(u8* vram, int pass_number) { if (((FPU_PARAM_CFG >> 21) & 1) == 0) // Type 1 region header type @@ -1772,15 +1772,15 @@ bool UsingAutoSort(int pass_number) else { // Type 2 - RegionArrayTile tile = getRegionTile(pass_number); + RegionArrayTile tile = getRegionTile(vram, pass_number); return !tile.PreSort; } } -static bool ClearZBeforePass(int pass_number) +static bool ClearZBeforePass(u8* vram, int pass_number) { - RegionArrayTile tile = getRegionTile(pass_number); + RegionArrayTile tile = getRegionTile(vram, pass_number); return !tile.NoZClear; } diff --git a/libswirl/hw/sh4/dyna/driver.cpp b/libswirl/hw/sh4/dyna/driver.cpp index 5d62e1ea9e..3c1b79a525 100644 --- a/libswirl/hw/sh4/dyna/driver.cpp +++ b/libswirl/hw/sh4/dyna/driver.cpp @@ -437,6 +437,8 @@ void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc) return (void*)rv; } struct recSH4 : SuperH4Backend { + ~recSH4() { Term(); } + void ClearCache() { LastAddr = LastAddr_min; diff --git a/libswirl/hw/sh4/interpr/sh4_interpreter.cpp b/libswirl/hw/sh4/interpr/sh4_interpreter.cpp index 183b2a6943..7eb6716ffe 100644 --- a/libswirl/hw/sh4/interpr/sh4_interpreter.cpp +++ b/libswirl/hw/sh4/interpr/sh4_interpreter.cpp @@ -21,6 +21,7 @@ #include "profiler/profiler.h" #include "../dyna/blockmanager.h" #include "../sh4_sched.h" +#include "hw/arm7/SoundCPU.h" #include "libswirl.h" @@ -41,8 +42,7 @@ static s32 l; #define AICA_SAMPLE_CYCLES (SH4_MAIN_CLOCK/(44100/AICA_SAMPLE_GCM)*32) int aica_schid = -1; -int rtc_schid = -1; - +int ds_schid = -1; //14336 Cycles const int AICA_TICK = 145124; @@ -106,8 +106,8 @@ int AicaUpdate(void* psh4, int tag, int c, int j) //if (aica_sample_cycles>=AICA_SAMPLE_CYCLES) { - g_SoundCPU->Update(512 * 32); - g_AICA->Update(1 * 32); + sh4_cpu->GetA0H(A0H_SCPU)->Update(512 * 32); + sh4_cpu->GetA0H(A0H_AICA)->Update(1 * 32); //aica_sample_cycles-=AICA_SAMPLE_CYCLES; } @@ -117,8 +117,6 @@ int AicaUpdate(void* psh4, int tag, int c, int j) int DreamcastSecond(void* psh4, int tag, int c, int j) { - RealTimeClock++; - #if 1 //HOST_OS==OS_WINDOWS prof_periodical(); #endif @@ -155,7 +153,7 @@ int UpdateSystem_INTC() } struct SH4IInterpreter : SuperH4Backend { - + ~SH4IInterpreter() { Term(); } void Loop() { l = SH4_TIMESLICE; @@ -211,7 +209,7 @@ MMIODevice* SuperH4_impl::GetA0Handler(Area0Hanlders slot) { } bool SuperH4_impl::setBackend(SuperH4Backends backend) { - if (sh4_backend) { sh4_backend->Term(); delete sh4_backend; sh4_backend = nullptr; } + if (sh4_backend) { delete sh4_backend; sh4_backend = nullptr; } switch (backend) { @@ -326,8 +324,8 @@ bool SuperH4_impl::Init() aica_schid = sh4_sched_register(sh4_cpu, 0, &AicaUpdate); sh4_sched_request(aica_schid, AICA_TICK); - rtc_schid = sh4_sched_register(sh4_cpu, 0, &DreamcastSecond); - sh4_sched_request(rtc_schid, SH4_MAIN_CLOCK); + ds_schid = sh4_sched_register(sh4_cpu, 0, &DreamcastSecond); + sh4_sched_request(ds_schid, SH4_MAIN_CLOCK); } memset(&p_sh4rcb->cntx, 0, sizeof(p_sh4rcb->cntx)); @@ -339,6 +337,10 @@ bool SuperH4_impl::Init() void SuperH4_impl::Term() { Stop(); + + sh4_sched_cleanup(); + if (sh4_backend) { delete sh4_backend; sh4_backend = nullptr; } + printf("Sh4 Term\n"); } diff --git a/libswirl/hw/sh4/modules/dmac.cpp b/libswirl/hw/sh4/modules/dmac.cpp index 127bee3836..02d543fd25 100644 --- a/libswirl/hw/sh4/modules/dmac.cpp +++ b/libswirl/hw/sh4/modules/dmac.cpp @@ -24,7 +24,7 @@ DMAC_DMAOR_type DMAC_DMAOR; */ -void DMAC_Ch2St() +void DMAC_Ch2St(u8* vram) { u32 chcr = DMAC_CHCR(2).full; u32 dmaor = DMAC_DMAOR.full; @@ -61,7 +61,7 @@ void DMAC_Ch2St() { u32 *sys_buf=(u32 *)GetMemPtr(src,len);//(&mem_b[src&RAM_MASK]); u32 new_len=RAM_SIZE-p_addr; - TAWrite(dst,sys_buf,(new_len/32)); + TAWrite(dst,sys_buf,(new_len/32), vram); len-=new_len; src+=new_len; //dst+=new_len; @@ -69,7 +69,7 @@ void DMAC_Ch2St() else { u32 *sys_buf=(u32 *)GetMemPtr(src,len);//(&mem_b[src&RAM_MASK]); - TAWrite(dst,sys_buf,(len/32)); + TAWrite(dst,sys_buf,(len/32), vram); src+=len; break; } diff --git a/libswirl/hw/sh4/modules/dmac.h b/libswirl/hw/sh4/modules/dmac.h index 4131092f61..f615293f48 100644 --- a/libswirl/hw/sh4/modules/dmac.h +++ b/libswirl/hw/sh4/modules/dmac.h @@ -5,7 +5,7 @@ // void dmac_ddt_ch0_ddt(u32 src,u32 dst,u32 count); void dmac_ddt_ch2_direct(u32 dst,u32 count); -void DMAC_Ch2St(); +void DMAC_Ch2St(u8* vram); //Init/Res/Term diff --git a/libswirl/hw/sh4/sh4_if.h b/libswirl/hw/sh4/sh4_if.h index b7a8398815..6bc7f26377 100644 --- a/libswirl/hw/sh4/sh4_if.h +++ b/libswirl/hw/sh4/sh4_if.h @@ -250,16 +250,24 @@ enum Area0Hanlders { A0H_MAPLE, A0H_ASIC, A0H_SPG, + A0H_SCPU, + A0H_DSP, A0H_MAX }; struct SuperH4 { - static SuperH4* Create(); + VLockedMemory vram; + VLockedMemory aica_ram; virtual void SetA0Handler(Area0Hanlders slot, MMIODevice* dev) = 0; virtual MMIODevice* GetA0Handler(Area0Hanlders slot) = 0; + template + T* GetA0H(Area0Hanlders slot) { + return dynamic_cast(GetA0Handler(slot)); + } + virtual bool setBackend(SuperH4Backends backend) = 0; virtual bool Init() = 0; @@ -276,9 +284,9 @@ struct SuperH4 { virtual void ResetCache() = 0; - //virtual void RaiseExeption(u32 ExeptionCode, u32 VectorAddress) = 0; - virtual ~SuperH4() { } + + static SuperH4* Create(); }; struct Sh4Context @@ -388,11 +396,10 @@ s32 rcb_poffs(T* ptr) struct SuperH4Backend { virtual bool Init() = 0; - virtual void Term() = 0; virtual void Loop() = 0; virtual void ClearCache() = 0; - virtual ~SuperH4Backend() { } + virtual ~SuperH4Backend() { } }; //Get an interface to sh4 interpreter diff --git a/libswirl/hw/sh4/sh4_mem.cpp b/libswirl/hw/sh4/sh4_mem.cpp index 8ca00ddeda..0c107fe379 100644 --- a/libswirl/hw/sh4/sh4_mem.cpp +++ b/libswirl/hw/sh4/sh4_mem.cpp @@ -29,13 +29,13 @@ void map_area1_init() pvr_write_area1_8,pvr_write_area1_16,pvr_write_area1_32); } -void map_area1(u32 base) +void map_area1(SuperH4* sh4, u32 base) { //map vram //Lower 32 mb map //64b interface - _vmem_map_block(vram.data,0x04 | base,0x04 | base,VRAM_SIZE-1); + _vmem_map_block(sh4->vram.data,0x04 | base,0x04 | base,VRAM_SIZE-1); //32b interface _vmem_map_handler(area1_32b,0x05 | base,0x05 | base); @@ -50,7 +50,7 @@ void map_area2_init() //nothing to map :p } -void map_area2(u32 base) +void map_area2(SuperH4* sh4, u32 base) { //nothing to map :p } @@ -61,7 +61,7 @@ void map_area3_init() { } -void map_area3(u32 base) +void map_area3(SuperH4* sh4, u32 base) { //32x2 or 16x4 _vmem_map_block_mirror(mem_b.data,0x0C | base,0x0F | base,RAM_SIZE); @@ -73,7 +73,7 @@ void map_area4_init() } -void map_area4(u32 base) +void map_area4(SuperH4* sh4, u32 base) { //TODO : map later @@ -103,7 +103,7 @@ void map_area5_init() area5_handler = _vmem_register_handler_Template(ReadMem_extdev_T,WriteMem_extdev_T); } -void map_area5(u32 base) +void map_area5(SuperH4* sh4, u32 base) { //map whole region to plugin handler :) _vmem_map_handler(area5_handler,base|0x14,base|0x17); @@ -114,7 +114,7 @@ void map_area6_init() { //nothing to map :p } -void map_area6(u32 base) +void map_area6(SuperH4* sh4, u32 base) { //nothing to map :p } @@ -165,18 +165,18 @@ void mem_map_default(SuperH4_impl* sh4) //some areas can be customised :) for (int i=0x0;i<0xE;i+=0x2) { - map_area0(i<<4); //Bios,Flahsrom,i/f regs,Ext. Device,Sound Ram - map_area1(i<<4); //VRAM - map_area2(i<<4); //Unassigned - map_area3(i<<4); //RAM - map_area4(i<<4); //TA - map_area5(i<<4); //Ext. Device - map_area6(i<<4); //Unassigned - map_area7(i<<4); //Sh4 Regs + map_area0(sh4, i<<4); //Bios,Flahsrom,i/f regs,Ext. Device,Sound Ram + map_area1(sh4, i<<4); //VRAM + map_area2(sh4, i<<4); //Unassigned + map_area3(sh4, i<<4); //RAM + map_area4(sh4, i<<4); //TA + map_area5(sh4, i<<4); //Ext. Device + map_area6(sh4, i<<4); //Unassigned + map_area7(sh4, i<<4); //Sh4 Regs } //map p4 region :) - map_p4(); + map_p4(sh4); } void mem_Init(SuperH4_impl* sh4) { diff --git a/libswirl/hw/sh4/sh4_mem_area0.cpp b/libswirl/hw/sh4/sh4_mem_area0.cpp index e1d50ef2f5..7c0731c0e8 100644 --- a/libswirl/hw/sh4/sh4_mem_area0.cpp +++ b/libswirl/hw/sh4/sh4_mem_area0.cpp @@ -84,7 +84,8 @@ bool LoadRomFiles(const string& root) syscfg.mono = 0; syscfg.autostart = 1; } - u32 time = GetRTC_now(); + + u32 time = libAICA_GetRTC_now(); syscfg.time_lo = time & 0xffff; syscfg.time_hi = time >> 16; if (settings.dreamcast.language <= 5) @@ -192,15 +193,6 @@ struct ExtDevice : MMIODevice { } }; -struct RTCDevice : MMIODevice { - u32 Read(u32 addr, u32 sz) { - return ReadMem_aica_rtc(addr, sz); - } - void Write(u32 addr, u32 data, u32 sz) { - WriteMem_aica_rtc(addr, data, sz); - } -}; - MMIODevice* Create_BiosDevice() { return new BiosDevice(); @@ -214,11 +206,6 @@ MMIODevice* Create_ExtDevice() { return new ExtDevice(); } -MMIODevice* Create_RTCDevice() { - return new RTCDevice(); -} - - SuperH4* SuperH4::Create() { @@ -309,7 +296,7 @@ T DYNACALL ReadMem_area0(SuperH4* psh4, u32 addr) //map 0x0080 to 0x00FF else if ((base >=0x0080) && (base <=0x00FF) /*&& (addr>= 0x00800000) && (addr<=0x00FFFFFF)*/) // :AICA- Wave Memory { - ReadMemArrRet(aica_ram.data,addr&ARAM_MASK,sz); + ReadMemArrRet(sh4->aica_ram.data, addr & (sh4->aica_ram.size - 1), sz); } //map 0x0100 to 0x01FF else if ((base >=0x0100) && (base <=0x01FF) /*&& (addr>= 0x01000000) && (addr<= 0x01FFFFFF)*/) // :Ext. Device @@ -383,7 +370,7 @@ void DYNACALL WriteMem_area0(SuperH4* psh4, u32 addr,T data) //map 0x0080 to 0x00FF else if ((base >=0x0080) && (base <=0x00FF) /*&& (addr>= 0x00800000) && (addr<=0x00FFFFFF)*/) // AICA- Wave Memory { - WriteMemArrRet(aica_ram.data,addr&ARAM_MASK,data,sz); + WriteMemArrRet(sh4->aica_ram.data, addr & (sh4->aica_ram.size - 1), data, sz); return; } //map 0x0100 to 0x01FF @@ -426,7 +413,7 @@ void map_area0_init() area0_handler = _vmem_register_handler_Template(ReadMem_area0,WriteMem_area0); } -void map_area0(u32 base) +void map_area0(SuperH4* sh4, u32 base) { verify(base<0xE0); diff --git a/libswirl/hw/sh4/sh4_mem_area0.h b/libswirl/hw/sh4/sh4_mem_area0.h index 67db5a0ce6..a9eb056629 100644 --- a/libswirl/hw/sh4/sh4_mem_area0.h +++ b/libswirl/hw/sh4/sh4_mem_area0.h @@ -4,7 +4,7 @@ #include "SuperH4_impl.h" void map_area0_init(); -void map_area0(u32 base); +void map_area0(SuperH4* sh4, u32 base); //Init/Res/Term bool sh4_area0_Init(SuperH4_impl* sh4); diff --git a/libswirl/hw/sh4/sh4_mmio.h b/libswirl/hw/sh4/sh4_mmio.h index 5dd5991ef0..abe024cd29 100644 --- a/libswirl/hw/sh4/sh4_mmio.h +++ b/libswirl/hw/sh4/sh4_mmio.h @@ -11,5 +11,8 @@ struct MMIODevice virtual u32 Read(u32 addr, u32 sz) { die("not implemented"); return 0; }; virtual void Write(u32 addr, u32 data, u32 sz) { die("not implemented"); }; - virtual ~MMIODevice() { } + virtual void serialize(void** data, unsigned int* total_size) { } + virtual void unserialize(void** data, unsigned int* total_size) { } + + virtual ~MMIODevice() { Term(); } }; diff --git a/libswirl/hw/sh4/sh4_mmr.cpp b/libswirl/hw/sh4/sh4_mmr.cpp index 4c54b8066b..17b2b58706 100644 --- a/libswirl/hw/sh4/sh4_mmr.cpp +++ b/libswirl/hw/sh4/sh4_mmr.cpp @@ -921,7 +921,7 @@ void map_area7_init() area7_orc_handler= _vmem_register_handler_Template(ReadMem_area7_OCR_T,WriteMem_area7_OCR_T); } -void map_area7(u32 base) +void map_area7(SuperH4* sh4, u32 base) { //OCR @ //((addr>=0x7C000000) && (addr<=0x7FFFFFFF)) @@ -934,7 +934,7 @@ void map_area7(u32 base) } //P4 -void map_p4() +void map_p4(SuperH4* sh4) { //P4 Region : _vmem_handler p4_handler = _vmem_register_handler_Template(ReadMem_P4,WriteMem_P4); @@ -950,5 +950,5 @@ void map_p4() _vmem_map_block(sq_both,0xE2,0xE2,63); _vmem_map_block(sq_both,0xE3,0xE3,63); - map_area7(0xE0); + map_area7(sh4, 0xE0); } diff --git a/libswirl/hw/sh4/sh4_mmr.h b/libswirl/hw/sh4/sh4_mmr.h index 5f10d21a9f..475b73bb49 100644 --- a/libswirl/hw/sh4/sh4_mmr.h +++ b/libswirl/hw/sh4/sh4_mmr.h @@ -5,8 +5,8 @@ //For mem mapping void map_area7_init(); -void map_area7(u32 base); -void map_p4(); +void map_area7(SuperH4* sh4, u32 base); +void map_p4(SuperH4* sh4); #define OnChipRAM_SIZE (0x2000) #define OnChipRAM_MASK (OnChipRAM_SIZE-1) diff --git a/libswirl/hw/sh4/sh4_sched.cpp b/libswirl/hw/sh4/sh4_sched.cpp index 78775f9b8d..e476ccffb7 100644 --- a/libswirl/hw/sh4/sh4_sched.cpp +++ b/libswirl/hw/sh4/sh4_sched.cpp @@ -168,3 +168,7 @@ void sh4_sched_tick(int cycles) sh4_sched_ffts(); } } + +void sh4_sched_cleanup() { + sch_list.clear(); +} \ No newline at end of file diff --git a/libswirl/hw/sh4/sh4_sched.h b/libswirl/hw/sh4/sh4_sched.h index aa30e5eb86..8883a5104c 100644 --- a/libswirl/hw/sh4/sh4_sched.h +++ b/libswirl/hw/sh4/sh4_sched.h @@ -49,6 +49,8 @@ void sh4_sched_tick(int cycles); void sh4_sched_ffts(); +void sh4_sched_cleanup(); + extern u32 sh4_sched_intr; struct sched_list diff --git a/libswirl/jit/backend/arm32/ngen_arm.S b/libswirl/jit/backend/arm32/ngen_arm.S index 55fbc8fadf..3abb9bd86f 100644 --- a/libswirl/jit/backend/arm32/ngen_arm.S +++ b/libswirl/jit/backend/arm32/ngen_arm.S @@ -48,8 +48,11 @@ BCC CSYM(_Z13ta_vtx_data32Pv) @TA write? CSYM(TAWriteSQ_yuv): CMP R3, #0x1000000 @Yuv write ? BCS CSYM(TAWriteSQ_vram) +# calculate vram ptr from sq +ADD R2, R1, #512 +ADD R2, R1, #0x04000000 MOV R1, #1 -B CSYM(_Z8YUV_dataPjj) +B CSYM(_Z8YUV_dataPjjPh) CSYM(TAWriteSQ_vram): @vram write .. #ifdef TARGET_IPHONE @@ -202,6 +205,8 @@ bx lr end_ngen_mainloop: @@@@@@@@@@ ngen_mainloop @@@@@@@@@@ +@ to be moved to vixl codegen +#if 0 #if FEAT_AREC == DYNAREC_JIT .global CSYM(arm_compilecode) HIDDEN(arm_compilecode) @@ -243,9 +248,10 @@ HIDDEN(arm_dispatch) CSYM(arm_dispatch): ldrd r0,r1,[r8,#184] @load: Next PC, interrupt -#if ARAM_SIZE == 2*1024*1024 + @ TODO: FIX THIS TO NOT BE STATIC / CODEGEN on INIT +#if INTERNAL_ARAM_SIZE == 2*1024*1024 ubfx r2,r0,#2,#19 @ assuming 2 MB address space max (21 bits) -#elif ARAM_SIZE == 8*1024*1024 +#elif INTERNAL_ARAM_SIZE == 8*1024*1024 ubfx r2,r0,#2,#21 @ assuming 8 MB address space max (23 bits) #else #error Unsupported AICA RAM size @@ -269,6 +275,8 @@ CSYM(arm_exit): pop {r4,r5,r8,r9,pc} #endif +#endif + @@@@@@ @matrix mul #ifndef _ANDROID diff --git a/libswirl/jit/emitter/arm32/E_DataOp.h b/libswirl/jit/emitter/arm32/E_DataOp.h index c9d8dafcb9..44036a1e48 100644 --- a/libswirl/jit/emitter/arm32/E_DataOp.h +++ b/libswirl/jit/emitter/arm32/E_DataOp.h @@ -146,36 +146,6 @@ ADD.SP.REG 0x008D0000 EAPI MVN DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_MVN); EMIT_I; } #endif - - - static u32 ARMImmid8r4_enc(u32 imm32) - { - for (int i=0;i<=30;i+=2) - { - u32 immv=(imm32<>(32-i)); - if (i == 0) - immv = imm32; - if (immv<256) - { - return ((i/2)<<8) | immv; - } - } - - return -1; - } - - static u32 ARMImmid8r4(u32 imm8r4) - { - u32 rv = ARMImmid8r4_enc(imm8r4); - - verify(rv!=-1); - return rv; - } - - static bool is_i8r4(u32 i32) { return ARMImmid8r4_enc(i32) != -1; } - - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, u32 RmLSL, bool S, ConditionCode CC=AL) { DECL_Id(0x00800000); diff --git a/libswirl/jit/emitter/arm32/arm_coding.h b/libswirl/jit/emitter/arm32/arm_coding.h index f0cad69a31..a70877eb7e 100644 --- a/libswirl/jit/emitter/arm32/arm_coding.h +++ b/libswirl/jit/emitter/arm32/arm_coding.h @@ -246,9 +246,32 @@ namespace ARM }; + static u32 ARMImmid8r4_enc(u32 imm32) + { + for (int i = 0; i <= 30; i += 2) + { + u32 immv = (imm32 << i) | (imm32 >> (32 - i)); + if (i == 0) + immv = imm32; + if (immv < 256) + { + return ((i / 2) << 8) | immv; + } + } + + return -1; + } + + static u32 ARMImmid8r4(u32 imm8r4) + { + u32 rv = ARMImmid8r4_enc(imm8r4); + verify(rv != -1); + return rv; + } - - - + static bool is_i8r4(u32 i32) + { + return ARMImmid8r4_enc(i32) != -1; + } }; diff --git a/libswirl/jit/emitter/arm32/arm_emitter.h b/libswirl/jit/emitter/arm32/arm_emitter.h index 636cedf3d2..950deab355 100644 --- a/libswirl/jit/emitter/arm32/arm_emitter.h +++ b/libswirl/jit/emitter/arm32/arm_emitter.h @@ -121,8 +121,4 @@ namespace ARM //#include "H_fp.h" - - - - - +#undef I diff --git a/libswirl/libswirl.cpp b/libswirl/libswirl.cpp index 7bfc378221..8b7896bd2b 100644 --- a/libswirl/libswirl.cpp +++ b/libswirl/libswirl.cpp @@ -31,6 +31,8 @@ #include "hw/maple/maple_if.h" #include "hw/modem/modem.h" #include "hw/holly/holly_intc.h" +#include "hw/aica/aica_mmio.h" +#include "hw/arm7/SoundCPU.h" #define fault_printf(...) @@ -40,14 +42,8 @@ unique_ptr virtualDreamcast; unique_ptr g_GDRDisc; -unique_ptr g_SoundCPU; -unique_ptr g_AICA; -MMIODevice* g_GDRomDrive; -static unique_ptr powerVR; - -void FlushCache(); void LoadCustom(); settings_t settings; @@ -62,10 +58,9 @@ MMIODevice* Create_BiosDevice(); MMIODevice* Create_FlashDevice(); MMIODevice* Create_NaomiDevice(SystemBus* sb); SystemBus* Create_SystemBus(); -MMIODevice* Create_PVRDevice(SystemBus* sb, ASIC* asic, SPG* spg); +MMIODevice* Create_PVRDevice(SystemBus* sb, ASIC* asic, SPG* spg, u8* vram); MMIODevice* Create_ExtDevice(); -MMIODevice* Create_AicaDevice(SystemBus* sb, ASIC* asic); -MMIODevice* Create_RTCDevice(); + #if HOST_OS==OS_WINDOWS @@ -119,10 +114,6 @@ int GetFile(char* szFileName) s32 plugins_Init() { - powerVR.reset(PowerVR::Create()); - - if (s32 rv = powerVR->Init()) - return rv; #ifndef TARGET_DISPFRAME g_GDRDisc.reset(GDRomDisc::Create()); @@ -131,41 +122,19 @@ s32 plugins_Init() return rv; #endif - g_AICA.reset(AICA::Create()); - - if (s32 rv = g_AICA->Init()) - return rv; - - g_SoundCPU.reset(SoundCPU::Create()); - - if (s32 rv = g_SoundCPU->Init()) - return rv; - return rv_ok; } void plugins_Term() { - //term all plugins - g_SoundCPU->Term(); - g_SoundCPU.reset(nullptr); - - g_AICA->Term(); - g_GDRDisc->Term(); g_GDRDisc.reset(nullptr); - - powerVR->Term(); - powerVR.reset(nullptr); } void plugins_Reset(bool Manual) { reios_reset(); - powerVR->Reset(Manual); g_GDRDisc->Reset(Manual); - g_AICA->Reset(Manual); - g_SoundCPU->Reset(Manual); //libExtDevice_Reset(Manual); } @@ -301,6 +270,9 @@ void InitSettings() settings.dynarec.idleskip = true; settings.dynarec.unstable_opt = false; settings.dynarec.safemode = true; + settings.dynarec.ScpuEnable = true; + settings.dynarec.DspEnable = true; + settings.dreamcast.cable = 3; // TV composite settings.dreamcast.region = 3; // default settings.dreamcast.broadcast = 4; // default @@ -382,6 +354,9 @@ void LoadSettings(bool game_specific) settings.dynarec.unstable_opt = cfgLoadBool(config_section, "Dynarec.unstable-opt", settings.dynarec.unstable_opt); settings.dynarec.safemode = cfgLoadBool(config_section, "Dynarec.safe-mode", settings.dynarec.safemode); settings.dynarec.SmcCheckLevel = (SmcCheckEnum)cfgLoadInt(config_section, "Dynarec.SmcCheckLevel", settings.dynarec.SmcCheckLevel); + settings.dynarec.ScpuEnable = cfgLoadInt(config_section, "Dynarec.ScpuEnabled", settings.dynarec.ScpuEnable); + settings.dynarec.DspEnable = cfgLoadInt(config_section, "Dynarec.DspEnabled", settings.dynarec.DspEnable); + //disable_nvmem can't be loaded, because nvmem init is before cfg load settings.dreamcast.cable = cfgLoadInt(config_section, "Dreamcast.Cable", settings.dreamcast.cable); settings.dreamcast.region = cfgLoadInt(config_section, "Dreamcast.Region", settings.dreamcast.region); @@ -516,6 +491,8 @@ void LoadCustom() void SaveSettings() { cfgSaveBool("config", "Dynarec.Enabled", settings.dynarec.Enable); + cfgSaveInt("config", "Dynarec.ScpuEnabled", settings.dynarec.ScpuEnable); + cfgSaveInt("config", "Dynarec.DspEnabled", settings.dynarec.DspEnable); cfgSaveInt("config", "Dreamcast.Cable", settings.dreamcast.cable); cfgSaveInt("config", "Dreamcast.Region", settings.dreamcast.region); cfgSaveInt("config", "Dreamcast.Broadcast", settings.dreamcast.broadcast); @@ -621,16 +598,40 @@ void* dc_run(void*) luabindings_onstart(); #endif - if (settings.dynarec.Enable) + if (settings.dynarec.Enable && sh4_cpu->setBackend(SH4BE_DYNAREC)) { - sh4_cpu->setBackend(SH4BE_DYNAREC); - printf("Using Recompiler\n"); + printf("Using MCPU Recompiler\n"); } else { sh4_cpu->setBackend(SH4BE_INTERPRETER); - printf("Using Interpreter\n"); + printf("Using MCPU Interpreter\n"); } + + auto scpu = sh4_cpu->GetA0H(A0H_SCPU); + + if (settings.dynarec.ScpuEnable && scpu->setBackend(ARM7BE_DYNAREC)) + { + printf("Using SCPU Recompiler\n"); + } + else + { + scpu->setBackend(ARM7BE_INTERPRETER); + printf("Using SCPU Interpreter\n"); + } + + auto dsp = sh4_cpu->GetA0H(A0H_DSP); + + if (settings.dynarec.DspEnable && dsp->setBackend(DSPBE_DYNAREC)) + { + printf("Using DSP Recompiler\n"); + } + else + { + dsp->setBackend(DSPBE_INTERPRETER); + printf("Using DSP Interpreter\n"); + } + do { reset_requested = false; @@ -659,19 +660,14 @@ void* dc_run(void*) cThread emu_thread(&dc_run, NULL); -static bool init_done; - int reicast_init(int argc, char* argv[]) { #ifdef _WIN32 setbuf(stdout, 0); setbuf(stderr, 0); #endif - if (!_vmem_reserve()) - { - printf("Failed to alloc mem\n"); - return -1; - } + + // TODO: Move vmem_reserve back here if (ParseCommandLine(argc, argv)) { return 69; @@ -705,13 +701,17 @@ void reicast_ui_loop() { } void reicast_term() { - g_GUIRenderer.release(); + g_GUIRenderer.reset(); - g_GUI.release(); + g_GUI.reset(); } struct Dreamcast_impl : VirtualDreamcast { + ~Dreamcast_impl() { + Term(); + } + void Reset() { plugins_Reset(false); @@ -719,6 +719,9 @@ struct Dreamcast_impl : VirtualDreamcast { mem_Reset((SuperH4_impl*)sh4_cpu, false); sh4_cpu->Reset(false); + + sh4_cpu->vram.Zero(); + sh4_cpu->aica_ram.Zero(); } int StartGame(const char* path) @@ -726,46 +729,6 @@ struct Dreamcast_impl : VirtualDreamcast { if (path != NULL) cfgSetVirtual("config", "image", path); - if (init_done) - { - InitSettings(); - LoadSettings(false); -#if DC_PLATFORM == DC_PLATFORM_DREAMCAST - if (!settings.bios.UseReios) -#endif - if (!LoadRomFiles(get_readonly_data_path(DATA_PATH))) - return -5; - -#if DC_PLATFORM == DC_PLATFORM_DREAMCAST - if (path == NULL) - { - // Boot BIOS - settings.imgread.LastImage[0] = 0; - g_GDRDisc->Swap(); // reload the gdrom file - } - else - { - g_GDRDisc->Swap(); // reload the gdrom file - LoadCustom(); - } -#elif DC_PLATFORM == DC_PLATFORM_NAOMI || DC_PLATFORM == DC_PLATFORM_ATOMISWAVE - if (!naomi_cart_SelectFile()) - return -6; - LoadCustom(); -#if DC_PLATFORM == DC_PLATFORM_NAOMI - mcfg_CreateNAOMIJamma(); -#elif DC_PLATFORM == DC_PLATFORM_ATOMISWAVE - mcfg_CreateAtomisWaveControllers(); -#endif -#endif - - Reset(); - - Resume(); - - return 0; - } - if (settings.bios.UseReios || !LoadRomFiles(get_readonly_data_path(DATA_PATH))) { #ifdef USE_REIOS @@ -783,6 +746,8 @@ struct Dreamcast_impl : VirtualDreamcast { #endif } + rend_init_renderer(sh4_cpu->vram.data); + if (plugins_Init()) return -3; @@ -802,7 +767,6 @@ struct Dreamcast_impl : VirtualDreamcast { #elif DC_PLATFORM == DC_PLATFORM_ATOMISWAVE mcfg_CreateAtomisWaveControllers(); #endif - init_done = true; Reset(); @@ -820,6 +784,12 @@ struct Dreamcast_impl : VirtualDreamcast { { sh4_cpu = SuperH4::Create(); + if (!_vmem_reserve(&sh4_cpu->vram , &sh4_cpu->aica_ram, INTERNAL_ARAM_SIZE)) + { + printf("Failed to alloc mem\n"); + return false; + } + MMIODevice* biosDevice = Create_BiosDevice(); MMIODevice* flashDevice = Create_FlashDevice(); @@ -831,18 +801,21 @@ struct Dreamcast_impl : VirtualDreamcast { #if DC_PLATFORM == DC_PLATFORM_NAOMI || DC_PLATFORM == DC_PLATFORM_ATOMISWAVE Create_NaomiDevice(systemBus) #else - (g_GDRomDrive = Create_GDRomDevice(systemBus, asic)) + Create_GDRomDevice(systemBus, asic) #endif ; SPG* spg = SPG::Create(asic); - MMIODevice* pvrDevice = Create_PVRDevice(systemBus, asic, spg); - MMIODevice* aicaDevice = Create_AicaDevice(systemBus, asic); + MMIODevice* pvrDevice = Create_PVRDevice(systemBus, asic, spg, sh4_cpu->vram.data); + DSP* dsp = DSP::Create(sh4_cpu->aica_ram.data, sh4_cpu->aica_ram.size); + AICA* aicaDevice = Create_AicaDevice(systemBus, asic, dsp, sh4_cpu->aica_ram.data, sh4_cpu->aica_ram.size); + SoundCPU* soundCPU = SoundCPU::Create(aicaDevice, sh4_cpu->aica_ram.data, sh4_cpu->aica_ram.size); + MMIODevice* mapleDevice = Create_MapleDevice(systemBus, asic); MMIODevice* extDevice = Create_ExtDevice(); // or Create_Modem(); - MMIODevice* modemDevice = extDevice; + MMIODevice* modemDevice = Create_ExtDevice(); // FIXME this is hacky #if DC_PLATFORM == DC_PLATFORM_DREAMCAST && defined(ENABLE_MODEM) modemDevice = Create_Modem(asic); @@ -863,7 +836,9 @@ struct Dreamcast_impl : VirtualDreamcast { sh4_cpu->SetA0Handler(A0H_MAPLE, mapleDevice); sh4_cpu->SetA0Handler(A0H_ASIC, asic); sh4_cpu->SetA0Handler(A0H_SPG, spg); - + sh4_cpu->SetA0Handler(A0H_SCPU, soundCPU); + sh4_cpu->SetA0Handler(A0H_DSP, dsp); + return sh4_cpu->Init(); } @@ -872,17 +847,20 @@ struct Dreamcast_impl : VirtualDreamcast { { sh4_cpu->Term(); - g_GDRomDrive = nullptr; - #if DC_PLATFORM != DC_PLATFORM_DREAMCAST naomi_cart_Close(); #endif plugins_Term(); - _vmem_release(); + rend_term_renderer(); + + _vmem_release(&sh4_cpu->vram, &sh4_cpu->aica_ram); mcfg_DestroyDevices(); SaveSettings(); + + delete sh4_cpu; + sh4_cpu = nullptr; } void Stop() @@ -1025,9 +1003,7 @@ struct Dreamcast_impl : VirtualDreamcast { data_ptr = data; sh4_cpu->ResetCache(); -#if FEAT_AREC == DYNAREC_JIT - FlushCache(); -#endif + sh4_cpu->GetA0H(A0H_SCPU)->InvalidateJitCache(); if (!dc_unserialize(&data_ptr, &total_size)) { @@ -1038,7 +1014,6 @@ struct Dreamcast_impl : VirtualDreamcast { } mmu_set_state(); - dsp.dyndirty = true; sh4_sched_ffts(); // TODO: save state fix this @@ -1057,7 +1032,7 @@ struct Dreamcast_impl : VirtualDreamcast { u8* address = (u8*)addr; - if (VramLockedWrite(address)) + if (VramLockedWrite(sh4_cpu->vram.data, address)) { fault_printf("VramLockedWrite!\n"); diff --git a/libswirl/libswirl.h b/libswirl/libswirl.h index b99ea46b66..6cb870045d 100644 --- a/libswirl/libswirl.h +++ b/libswirl/libswirl.h @@ -23,9 +23,6 @@ struct VirtualDreamcast { extern unique_ptr virtualDreamcast; extern unique_ptr g_GDRDisc; -extern unique_ptr g_SoundCPU; -extern unique_ptr g_AICA; -extern MMIODevice* g_GDRomDrive; // TODO: rename these diff --git a/libswirl/rend/TexCache.cpp b/libswirl/rend/TexCache.cpp index c05cc94828..01c5dd815b 100644 --- a/libswirl/rend/TexCache.cpp +++ b/libswirl/rend/TexCache.cpp @@ -126,7 +126,6 @@ void palette_update() using namespace std; vector VramLocks[VRAM_SIZE/PAGE_SIZE]; -VLockedMemory vram; // vram 32-64b //List functions // @@ -208,11 +207,11 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user { vramlist_lock.Lock(); - vram.LockRegion(block->start, block->len); + sh4_cpu->vram.LockRegion(block->start, block->len); //TODO: Fix this for 32M wrap as well if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { - vram.LockRegion(block->start + VRAM_SIZE, block->len); + sh4_cpu->vram.LockRegion(block->start + VRAM_SIZE, block->len); } vramlock_list_add(block); @@ -224,9 +223,9 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user } -bool VramLockedWrite(u8* address) +bool VramLockedWrite(u8* vram, u8* address) { - size_t offset=address-vram.data; + size_t offset=address-vram; if (offsetclear(); - vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)),PAGE_SIZE); + + sh4_cpu->vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)),PAGE_SIZE); //TODO: Fix this for 32M wrap as well if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { - vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE); + sh4_cpu->vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE); } vramlist_lock.Unlock(); diff --git a/libswirl/rend/TexCache.h b/libswirl/rend/TexCache.h index f509206407..2b3ef01962 100644 --- a/libswirl/rend/TexCache.h +++ b/libswirl/rend/TexCache.h @@ -629,8 +629,6 @@ void vramlock_Unlock_block(vram_block* block); vram_block* vramlock_Lock_32(u32 start_offset32,u32 end_offset32,void* userdata); vram_block* vramlock_Lock_64(u32 start_offset64,u32 end_offset64,void* userdata); -void vram_LockedWrite(u32 offset64); - void DePosterize(u32* source, u32* dest, int width, int height); void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha); -bool VramLockedWrite(u8* address); \ No newline at end of file +bool VramLockedWrite(u8* vram, u8* address); \ No newline at end of file diff --git a/libswirl/rend/d3d11/d3d11.cpp b/libswirl/rend/d3d11/d3d11.cpp index 042a0d4524..091de5b3a2 100644 --- a/libswirl/rend/d3d11/d3d11.cpp +++ b/libswirl/rend/d3d11/d3d11.cpp @@ -105,4 +105,4 @@ struct d3d11 : Renderer }; -static auto d3d11rend = RegisterRendererBackend(rendererbackend_t{ "d3d11", "Direct3D 11", -3, [] { return (Renderer*) new ::d3d11(); } }); \ No newline at end of file +static auto d3d11rend = RegisterRendererBackend(rendererbackend_t{ "d3d11", "Direct3D 11", -3, [] (u8* vram) { return (Renderer*) new ::d3d11(); } }); \ No newline at end of file diff --git a/libswirl/rend/gl4/gl4rend.cpp b/libswirl/rend/gl4/gl4rend.cpp index 8fa08ad5f7..862d7930f5 100644 --- a/libswirl/rend/gl4/gl4rend.cpp +++ b/libswirl/rend/gl4/gl4rend.cpp @@ -598,7 +598,7 @@ static bool gles_init() return true; } -static bool RenderFrame(bool isRenderFramebuffer) +static bool RenderFrame(u8* vram, bool isRenderFramebuffer) { static int old_screen_width, old_screen_height, old_screen_scaling; if (screen_width != old_screen_width || screen_height != old_screen_height || settings.rend.ScreenScaling != old_screen_scaling) { @@ -924,7 +924,7 @@ static bool RenderFrame(bool isRenderFramebuffer) KillTex=false; if (is_rtt) - ReadRTTBuffer(); + ReadRTTBuffer(vram); else if (settings.rend.ScreenScaling != 100 || gl.swap_buffer_not_preserved) gl4_render_output_framebuffer(); @@ -936,6 +936,10 @@ void termABuffer(); struct gl4rend : Renderer { + u8* vram; + + gl4rend(u8* vram) : vram(vram) { } + bool Init() { return gles_init(); } void Resize(int w, int h) { @@ -1003,9 +1007,9 @@ struct gl4rend : Renderer gles_term(); } - bool Process(TA_context* ctx) { return ProcessFrame(ctx); } - bool RenderPVR() { return RenderFrame(false); } - bool RenderFramebuffer() { return RenderFrame(true); } + bool Process(TA_context* ctx) { return ProcessFrame(vram, ctx); } + bool RenderPVR() { return RenderFrame(vram, false); } + bool RenderFramebuffer() { return RenderFrame(vram, true); } bool RenderLastFrame() { return gl4_render_output_framebuffer(); } void Present() { os_gl_swap(); } @@ -1019,10 +1023,10 @@ struct gl4rend : Renderer } virtual u32 GetTexture(TSP tsp, TCW tcw) { - return gl_GetTexture(tsp, tcw); + return gl_GetTexture(vram, tsp, tcw); } }; #include "hw/pvr/Renderer_if.h" -static auto gl41rend = RegisterRendererBackend(rendererbackend_t{ "gl41", "OpenGL 4.1 (Per Pixel Sort)", 2, [](){ return (Renderer*) new gl4rend(); } }); +static auto gl41rend = RegisterRendererBackend(rendererbackend_t{ "gl41", "OpenGL 4.1 (Per Pixel Sort)", 2, [](u8* vram){ return (Renderer*) new gl4rend(vram); } }); diff --git a/libswirl/rend/gles/gles.h b/libswirl/rend/gles/gles.h index 52d79c999e..d04c95499b 100644 --- a/libswirl/rend/gles/gles.h +++ b/libswirl/rend/gles/gles.h @@ -111,7 +111,7 @@ extern gl_ctx gl; extern GLuint fbTextureId; extern float fb_scale_x, fb_scale_y; -GLuint gl_GetTexture(TSP tsp,TCW tcw); +GLuint gl_GetTexture(u8* vram, TSP tsp,TCW tcw); struct text_info { u16* pdata; u32 width; @@ -122,10 +122,10 @@ enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, ModeCount }; void gl_load_osd_resources(); void gl_free_osd_resources(); -bool ProcessFrame(TA_context* ctx); +bool ProcessFrame(u8* vram, TA_context* ctx); void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format); -text_info raw_GetTexture(TSP tsp, TCW tcw); +text_info raw_GetTexture(u8* vram, TSP tsp, TCW tcw); void killtex(); void CollectCleanup(); void DoCleanup(); @@ -135,7 +135,7 @@ s32 SetTileClip(u32 val, GLint uniform); void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc); void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); -void ReadRTTBuffer(); +void ReadRTTBuffer(u8* vram); void RenderFramebuffer(); void DrawFramebuffer(float w, float h); GLuint init_output_framebuffer(int width, int height); @@ -208,6 +208,7 @@ struct TextureCacheData { TSP tsp; //dreamcast texture parameters TCW tcw; + u8* vram; GLuint texID; //gl texture u16* pData; diff --git a/libswirl/rend/gles/glesrend.cpp b/libswirl/rend/gles/glesrend.cpp index 6b9646fa40..5f3387f336 100644 --- a/libswirl/rend/gles/glesrend.cpp +++ b/libswirl/rend/gles/glesrend.cpp @@ -1005,14 +1005,14 @@ void OSD_DRAW(bool clear_screen) g_GUI->RenderOSD(); } -bool ProcessFrame(TA_context* ctx) +bool ProcessFrame(u8* vram, TA_context* ctx) { ctx->rend_inuse.Lock(); if (KillTex) killtex(); - if (!ta_parse_vdrc(ctx)) + if (!ta_parse_vdrc(vram, ctx)) return false; CollectCleanup(); @@ -1041,7 +1041,7 @@ static void upload_vertex_indices() glCheck(); } -bool RenderFrame(bool isRenderFramebuffer) +bool RenderFrame(u8* vram, bool isRenderFramebuffer) { if (isRenderFramebuffer) { RenderFramebuffer(); @@ -1487,15 +1487,18 @@ bool RenderFrame(bool isRenderFramebuffer) KillTex=false; if (is_rtt) - ReadRTTBuffer(); + ReadRTTBuffer(vram); else if (settings.rend.ScreenScaling != 100 || gl.swap_buffer_not_preserved) render_output_framebuffer(); return !is_rtt; } -struct glesrend : Renderer +struct glesrend final : Renderer { + u8* vram; + glesrend(u8* vram) : vram(vram) { } + bool Init() { return gles_init(); } void SetFBScale(float x, float y) { @@ -1511,9 +1514,9 @@ struct glesrend : Renderer gles_term(); } - bool Process(TA_context* ctx) { return ProcessFrame(ctx); } - bool RenderPVR() { return RenderFrame(false); } - bool RenderFramebuffer() { return RenderFrame(true); } + bool Process(TA_context* ctx) { return ProcessFrame(vram, ctx); } + bool RenderPVR() { return RenderFrame(vram, false); } + bool RenderFramebuffer() { return RenderFrame(vram, true); } bool RenderLastFrame() { return render_output_framebuffer(); } void Present() { os_gl_swap(); glViewport(0, 0, screen_width, screen_height); } @@ -1537,7 +1540,7 @@ struct glesrend : Renderer } virtual u32 GetTexture(TSP tsp, TCW tcw) { - return gl_GetTexture(tsp, tcw); + return gl_GetTexture(vram, tsp, tcw); } }; @@ -1703,4 +1706,4 @@ GLuint loadPNG(const string& fname, int &width, int &height) #include "hw/pvr/Renderer_if.h" -static auto gles2rend = RegisterRendererBackend(rendererbackend_t{ "gles", "OpenGL ES 2/PC41 (Per Triangle Sort)", 1, []() { return (Renderer*) new glesrend(); } }); \ No newline at end of file +static auto gles2rend = RegisterRendererBackend(rendererbackend_t{ "gles", "OpenGL ES 2/PC41 (Per Triangle Sort)", 1, [](u8* vram) { return (Renderer*) new glesrend(vram); } }); \ No newline at end of file diff --git a/libswirl/rend/gles/glestex.cpp b/libswirl/rend/gles/glestex.cpp index ed9fd4ea92..fd86bb3abb 100644 --- a/libswirl/rend/gles/glestex.cpp +++ b/libswirl/rend/gles/glestex.cpp @@ -472,7 +472,7 @@ bool TextureCacheData::Delete() map TexCache; typedef map::iterator TexCacheIter; -TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw); +TextureCacheData *getTextureCacheData(u8* vram, TSP tsp, TCW tcw); void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) { @@ -554,7 +554,7 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) glViewport(0, 0, fbw, fbh); // TODO CLIP_X/Y min? } -void ReadRTTBuffer() { +void ReadRTTBuffer(u8* vram) { u32 w = pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1; u32 h = pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1; @@ -585,7 +585,6 @@ void ReadRTTBuffer() { } } } - vram.UnLockRegion(0, 2 * vram.size); glPixelStorei(GL_PACK_ALIGNMENT, 1); u16 *dst = (u16 *)&vram[tex_addr]; @@ -640,19 +639,6 @@ void ReadRTTBuffer() { dst += (stride - w * 2) / 2; } } - - // Restore VRAM locks - for (TexCacheIter i = TexCache.begin(); i != TexCache.end(); i++) - { - if (i->second.lock_block != NULL) { - vram.LockRegion(i->second.sa_tex, i->second.sa + i->second.size - i->second.sa_tex); - - //TODO: Fix this for 32M wrap as well - if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { - vram.LockRegion(i->second.sa_tex + VRAM_SIZE, i->second.sa + i->second.size - i->second.sa_tex); - } - } - } } else { @@ -684,7 +670,7 @@ void ReadRTTBuffer() { for (tsp.TexU = 0; tsp.TexU <= 7 && (8 << tsp.TexU) < w; tsp.TexU++); for (tsp.TexV = 0; tsp.TexV <= 7 && (8 << tsp.TexV) < h; tsp.TexV++); - TextureCacheData *texture_data = getTextureCacheData(tsp, tcw); + TextureCacheData *texture_data = getTextureCacheData(vram, tsp, tcw); if (texture_data->texID != 0) glcache.DeleteTextures(1, &texture_data->texID); else @@ -711,7 +697,7 @@ const TSP TSPTextureCacheMask = { { 7, 7 } }; // TexAddr : 0x1FFFFF, Reserved : 0, StrideSel : 0, ScanOrder : 1, PixelFmt : 7, VQ_Comp : 1, MipMapped : 1 const TCW TCWTextureCacheMask = { { 0x1FFFFF, 0, 0, 1, 7, 1, 1 } }; -TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw) { +TextureCacheData *getTextureCacheData(u8* vram, TSP tsp, TCW tcw) { u64 key = tsp.full & TSPTextureCacheMask.full; if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) // Paletted textures have a palette selection that must be part of the key @@ -737,17 +723,18 @@ TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw) { tf->tsp = tsp; tf->tcw = tcw; + tf->vram = vram; } return tf; } -GLuint gl_GetTexture(TSP tsp, TCW tcw) +GLuint gl_GetTexture(u8* vram, TSP tsp, TCW tcw) { TexCacheLookups++; //lookup texture - TextureCacheData* tf = getTextureCacheData(tsp, tcw); + TextureCacheData* tf = getTextureCacheData(vram, tsp, tcw); if (tf->texID == 0) tf->Create(true); @@ -777,7 +764,7 @@ GLuint gl_GetTexture(TSP tsp, TCW tcw) } -text_info raw_GetTexture(TSP tsp, TCW tcw) +text_info raw_GetTexture(u8* vram, TSP tsp, TCW tcw) { text_info rv = { 0 }; @@ -797,6 +784,7 @@ text_info raw_GetTexture(TSP tsp, TCW tcw) tf->tsp = tsp; tf->tcw = tcw; + tf->vram = vram; tf->Create(false); } diff --git a/libswirl/rend/norend/norend.cpp b/libswirl/rend/norend/norend.cpp index 8d411e5ff1..b5a9a05fc6 100644 --- a/libswirl/rend/norend/norend.cpp +++ b/libswirl/rend/norend/norend.cpp @@ -32,6 +32,6 @@ struct norend : Renderer #include "hw/pvr/Renderer_if.h" -Renderer* rend_norend() { return new ::norend(); } +Renderer* rend_norend(u8* vram) { return new ::norend(); } static auto norend = RegisterRendererBackend(rendererbackend_t{ "none", "No PVR Rendering", -2, rend_norend }); \ No newline at end of file diff --git a/libswirl/rend/soft/slowsoftrend.cpp b/libswirl/rend/soft/slowsoftrend.cpp index 08932cd344..59ee095e16 100644 --- a/libswirl/rend/soft/slowsoftrend.cpp +++ b/libswirl/rend/soft/slowsoftrend.cpp @@ -147,9 +147,10 @@ struct IPs2 #define TPL_PRMS_pixel #define TPL_PRMS_triangle +struct slowsoftrend; // -typedef void(*RendtriangleFn)(PolyParam* pp, int vertex_offset, const Vertex& v1, const Vertex& v2, const Vertex& v3, u32* colorBuffer, RECT* area); +typedef void(slowsoftrend::*RendtriangleFn)(PolyParam* pp, int vertex_offset, const Vertex& v1, const Vertex& v2, const Vertex& v3, u32* colorBuffer, RECT* area); static RendtriangleFn RendtriangleFns[3][2][2][2][4][2]; TPL_DECL_pixel @@ -275,146 +276,151 @@ static void PixelFlush(PolyParam* pp, text_info* texture, float x, float y, u8* } } -//u32 nok,fok; -TPL_DECL_triangle -static void Rendtriangle(PolyParam* pp, int vertex_offset, const Vertex& v1, const Vertex& v2, const Vertex& v3, u32* colorBuffer, RECT* area) + +#if HOST_OS == OS_WINDOWS +static BITMAPINFOHEADER bi = { sizeof(BITMAPINFOHEADER), 0, 0, 1, 32, BI_RGB }; +#endif + +bool gles_init(); + +struct slowsoftrend : Renderer { - text_info texture = { 0 }; + u8* vram; - if (pp_Texture) { + slowsoftrend(u8* vram) : vram(vram) { } -#pragma omp critical (texture_lookup) - { - texture = raw_GetTexture(pp->tsp, pp->tcw); - } + //u32 nok,fok; + TPL_DECL_triangle + void Rendtriangle(PolyParam* pp, int vertex_offset, const Vertex& v1, const Vertex& v2, const Vertex& v3, u32* colorBuffer, RECT* area) + { + text_info texture = { 0 }; - } + if (pp_Texture) { - const int stride_bytes = STRIDE_PIXEL_OFFSET * 4; - //Plane equation +#pragma omp critical (texture_lookup) + { + texture = raw_GetTexture(vram, pp->tsp, pp->tcw); + } + } - const float Y1 = v1.y; - const float Y2 = v2.y; - const float Y3 = v3.y; + const int stride_bytes = STRIDE_PIXEL_OFFSET * 4; + //Plane equation - const float X1 = v1.x; - const float X2 = v2.x; - const float X3 = v3.x; - int sgn = 1; + const float Y1 = v1.y; + const float Y2 = v2.y; + const float Y3 = v3.y; - // Deltas - { - //area: (X1-X3)*(Y2-Y3)-(Y1-Y3)*(X2-X3) - float area = ((X1 - X3) * (Y2 - Y3) - (Y1 - Y3) * (X2 - X3)); + const float X1 = v1.x; + const float X2 = v2.x; + const float X3 = v3.x; - if (area > 0) - sgn = -1; + int sgn = 1; - if (pp->isp.CullMode != 0) { - float abs_area = fabsf(area); + // Deltas + { + //area: (X1-X3)*(Y2-Y3)-(Y1-Y3)*(X2-X3) + float area = ((X1 - X3) * (Y2 - Y3) - (Y1 - Y3) * (X2 - X3)); - if (abs_area < FPU_CULL_VAL) - return; + if (area > 0) + sgn = -1; - if (pp->isp.CullMode >= 2) { - u32 mode = vertex_offset ^ pp->isp.CullMode & 1; + if (pp->isp.CullMode != 0) { + float abs_area = fabsf(area); - if ( - (mode == 0 && area < 0) || - (mode == 1 && area > 0)) { + if (abs_area < FPU_CULL_VAL) return; + + if (pp->isp.CullMode >= 2) { + u32 mode = vertex_offset ^ pp->isp.CullMode & 1; + + if ( + (mode == 0 && area < 0) || + (mode == 1 && area > 0)) { + return; + } } } } - } - const float DX12 = sgn * (X1 - X2); - const float DX23 = sgn * (X2 - X3); - const float DX31 = sgn * (X3 - X1); + const float DX12 = sgn * (X1 - X2); + const float DX23 = sgn * (X2 - X3); + const float DX31 = sgn * (X3 - X1); - const float DY12 = sgn * (Y1 - Y2); - const float DY23 = sgn * (Y2 - Y3); - const float DY31 = sgn * (Y3 - Y1); + const float DY12 = sgn * (Y1 - Y2); + const float DY23 = sgn * (Y2 - Y3); + const float DY31 = sgn * (Y3 - Y1); - // Bounding rectangle - int minx = iround(mmin(X1, X2, X3, area->left)); - int miny = iround(mmin(Y1, Y2, Y3, area->top)); + // Bounding rectangle + int minx = iround(mmin(X1, X2, X3, area->left)); + int miny = iround(mmin(Y1, Y2, Y3, area->top)); - int spanx = iround(mmax(X1, X2, X3, area->right-1)) - minx + 1; - int spany = iround(mmax(Y1, Y2, Y3, area->bottom-1)) - miny + 1; + int spanx = iround(mmax(X1, X2, X3, area->right - 1)) - minx + 1; + int spany = iround(mmax(Y1, Y2, Y3, area->bottom - 1)) - miny + 1; - //Inside scissor area? - if (spanx < 0 || spany < 0) - return; + //Inside scissor area? + if (spanx < 0 || spany < 0) + return; - // Half-edge constants - float C1 = DY12 * X1 - DX12 * Y1; - float C2 = DY23 * X2 - DX23 * Y2; - float C3 = DY31 * X3 - DX31 * Y3; + // Half-edge constants + float C1 = DY12 * X1 - DX12 * Y1; + float C2 = DY23 * X2 - DX23 * Y2; + float C3 = DY31 * X3 - DX31 * Y3; - float hs12 = C1 + DX12 * miny - DY12 * minx; - float hs23 = C2 + DX23 * miny - DY23 * minx; - float hs31 = C3 + DX31 * miny - DY31 * minx; + float hs12 = C1 + DX12 * miny - DY12 * minx; + float hs23 = C2 + DX23 * miny - DY23 * minx; + float hs31 = C3 + DX31 * miny - DY31 * minx; - - u8* cb_y = (u8*)colorBuffer; - cb_y += miny * stride_bytes + minx * 4; - DECL_ALIGN(64) IPs2 ip; + u8* cb_y = (u8*)colorBuffer; + cb_y += miny * stride_bytes + minx * 4; - ip.Setup(pp, &texture, v1, v2, v3); + DECL_ALIGN(64) IPs2 ip; + ip.Setup(pp, &texture, v1, v2, v3); - float y_ps = miny; - float minx_ps = minx; - - // Loop through blocks - for (int y = spany; y > 0; y -= 1) - { - float Xhs12 = hs12; - float Xhs23 = hs23; - float Xhs31 = hs31; - u8* cb_x = cb_y; - float x_ps = minx_ps; - for (int x = spanx; x > 0; x -= 1) - { - Xhs12 -= DY12; - Xhs23 -= DY23; - Xhs31 -= DY31; - // Corners of block - bool inTriangle = EvalHalfSpaceAll(Xhs12, Xhs23, Xhs31); + float y_ps = miny; + float minx_ps = minx; - // Skip block when outside an edge - if (inTriangle) + // Loop through blocks + for (int y = spany; y > 0; y -= 1) + { + float Xhs12 = hs12; + float Xhs23 = hs23; + float Xhs31 = hs31; + u8* cb_x = cb_y; + float x_ps = minx_ps; + for (int x = spanx; x > 0; x -= 1) { - PixelFlush TPL_PRMS_pixel (pp, &texture, x_ps, y_ps, cb_x, ip); + Xhs12 -= DY12; + Xhs23 -= DY23; + Xhs31 -= DY31; + + // Corners of block + bool inTriangle = EvalHalfSpaceAll(Xhs12, Xhs23, Xhs31); + + // Skip block when outside an edge + if (inTriangle) + { + PixelFlush TPL_PRMS_pixel(pp, &texture, x_ps, y_ps, cb_x, ip); + } + + cb_x += 4; + x_ps = x_ps + 1; } - - cb_x += 4; - x_ps = x_ps + 1; + next_y: + hs12 += DX12; + hs23 += DX23; + hs31 += DX31; + cb_y += stride_bytes; + y_ps = y_ps + 1; } - next_y: - hs12 += DX12; - hs23 += DX23; - hs31 += DX31; - cb_y += stride_bytes; - y_ps = y_ps + 1; } -} - -#if HOST_OS == OS_WINDOWS -static BITMAPINFOHEADER bi = { sizeof(BITMAPINFOHEADER), 0, 0, 1, 32, BI_RGB }; -#endif - -bool gles_init(); -struct slowsoftrend : Renderer -{ void SetFBScale(float x, float y) { fb_scale_x = x; @@ -428,7 +434,7 @@ struct slowsoftrend : Renderer ctx->rend_inuse.Lock(); - if (!ta_parse_vdrc(ctx)) + if (!ta_parse_vdrc(vram, ctx)) return false; return true; @@ -455,7 +461,7 @@ struct slowsoftrend : Renderer //// RendtriangleFn fn = RendtriangleFns[alpha_mode][params[i].tsp.UseAlpha][params[i].pcw.Texture][params[i].tsp.IgnoreTexA][params[i].tsp.ShadInstr][params[i].pcw.Offset]; - fn(¶ms[i], v, verts[poly_idx[v]], verts[poly_idx[v + 1]], verts[poly_idx[v + 2]], render_buffer, area); + (this->*fn)(¶ms[i], v, verts[poly_idx[v]], verts[poly_idx[v + 1]], verts[poly_idx[v + 2]], render_buffer, area); } } } @@ -542,6 +548,7 @@ struct slowsoftrend : Renderer decoded_colors[2][c] = (REP_16((c >> 0) % 16) << 24) | (REP_16((c >> 12) % 16) << 16) | (REP_16((c >> 8) % 16) << 8) | (REP_16((c >> 4) % 16) << 0); } +#define Rendtriangle slowsoftrend::Rendtriangle { RendtriangleFns[0][0][1][0][0][0] = &Rendtriangle<0, 0, 1, 0, 0, 0>; RendtriangleFns[0][0][1][0][0][1] = &Rendtriangle<0, 0, 1, 0, 0, 1>; @@ -738,6 +745,7 @@ struct slowsoftrend : Renderer RendtriangleFns[2][1][0][1][3][0] = &Rendtriangle<2, 1, 0, 1, 3, 0>; RendtriangleFns[2][1][0][1][3][1] = &Rendtriangle<2, 1, 0, 1, 3, 1>; } +#undef Rendtriangle return true; } @@ -806,8 +814,8 @@ struct slowsoftrend : Renderer } }; -Renderer* rend_slowsoftrend() { - return new(_mm_malloc(sizeof(slowsoftrend), 32)) ::slowsoftrend(); +Renderer* rend_slowsoftrend(u8* vram) { + return new(_mm_malloc(sizeof(slowsoftrend), 32)) ::slowsoftrend(vram); } static auto slowsoftrend = RegisterRendererBackend(rendererbackend_t{ "slow", "Slow Software Renderer", 0, rend_slowsoftrend }); diff --git a/libswirl/rend/soft/softrend.cpp b/libswirl/rend/soft/softrend.cpp index 7b598bc39b..c608b0a0c1 100644 --- a/libswirl/rend/soft/softrend.cpp +++ b/libswirl/rend/soft/softrend.cpp @@ -248,9 +248,10 @@ struct IPs #define TPL_PRMS_pixel(useoldmsk) #define TPL_PRMS_triangle +struct softrend; // -typedef void(*RendtriangleFn)(PolyParam* pp, int vertex_offset, const Vertex &v1, const Vertex &v2, const Vertex &v3, u32* colorBuffer, RECT* area); +typedef void(softrend::*RendtriangleFn)(PolyParam* pp, int vertex_offset, const Vertex &v1, const Vertex &v2, const Vertex &v3, u32* colorBuffer, RECT* area); RendtriangleFn RendtriangleFns[3][2][2][2][4][2]; @@ -561,263 +562,267 @@ static void PixelFlush(PolyParam* pp, text_info* texture, __m128 x, __m128 y, u8 } //u32 nok,fok; -TPL_DECL_triangle -static void Rendtriangle(PolyParam* pp, int vertex_offset, const Vertex &v1, const Vertex &v2, const Vertex &v3, u32* colorBuffer, RECT* area) -{ - text_info texture = { 0 }; - if (pp_Texture) { +#if HOST_OS == OS_WINDOWS + BITMAPINFOHEADER bi = { sizeof(BITMAPINFOHEADER), 0, 0, 1, 32, BI_RGB }; +#endif - #pragma omp critical (texture_lookup) - { - texture = raw_GetTexture(pp->tsp, pp->tcw); - } - } +struct softrend : Renderer +{ + u8* vram; + softrend(u8* vram) : vram(vram) { } + + TPL_DECL_triangle + void Rendtriangle(PolyParam* pp, int vertex_offset, const Vertex& v1, const Vertex& v2, const Vertex& v3, u32* colorBuffer, RECT* area) + { + text_info texture = { 0 }; - const int stride_bytes = STRIDE_PIXEL_OFFSET * 4; - //Plane equation + if (pp_Texture) { +#pragma omp critical (texture_lookup) + { + texture = raw_GetTexture(vram, pp->tsp, pp->tcw); + } - // 28.4 fixed-point coordinates - const float Y1 = v1.y;// iround(16.0f * v1.y); - const float Y2 = v2.y;// iround(16.0f * v2.y); - const float Y3 = v3.y;// iround(16.0f * v3.y); + } - const float X1 = v1.x;// iround(16.0f * v1.x); - const float X2 = v2.x;// iround(16.0f * v2.x); - const float X3 = v3.x;// iround(16.0f * v3.x); + const int stride_bytes = STRIDE_PIXEL_OFFSET * 4; + //Plane equation - int sgn = 1; - // Deltas - { - //area: (X1-X3)*(Y2-Y3)-(Y1-Y3)*(X2-X3) - float area = ((X1 - X3)*(Y2 - Y3) - (Y1 - Y3)*(X2 - X3)); + // 28.4 fixed-point coordinates + const float Y1 = v1.y;// iround(16.0f * v1.y); + const float Y2 = v2.y;// iround(16.0f * v2.y); + const float Y3 = v3.y;// iround(16.0f * v3.y); + + const float X1 = v1.x;// iround(16.0f * v1.x); + const float X2 = v2.x;// iround(16.0f * v2.x); + const float X3 = v3.x;// iround(16.0f * v3.x); - if (area>0) - sgn = -1; + int sgn = 1; - if (pp->isp.CullMode != 0) { - float abs_area = fabsf(area); + // Deltas + { + //area: (X1-X3)*(Y2-Y3)-(Y1-Y3)*(X2-X3) + float area = ((X1 - X3) * (Y2 - Y3) - (Y1 - Y3) * (X2 - X3)); - if (abs_area < FPU_CULL_VAL) - return; + if (area > 0) + sgn = -1; - if (pp->isp.CullMode >= 2) { - u32 mode = vertex_offset ^ pp->isp.CullMode & 1; + if (pp->isp.CullMode != 0) { + float abs_area = fabsf(area); - if ( - (mode == 0 && area < 0) || - (mode == 1 && area > 0)) { + if (abs_area < FPU_CULL_VAL) return; + + if (pp->isp.CullMode >= 2) { + u32 mode = vertex_offset ^ pp->isp.CullMode & 1; + + if ( + (mode == 0 && area < 0) || + (mode == 1 && area > 0)) { + return; + } } } } - } - const float DX12 = sgn*(X1 - X2); - const float DX23 = sgn*(X2 - X3); - const float DX31 = sgn*(X3 - X1); + const float DX12 = sgn * (X1 - X2); + const float DX23 = sgn * (X2 - X3); + const float DX31 = sgn * (X3 - X1); - const float DY12 = sgn*(Y1 - Y2); - const float DY23 = sgn*(Y2 - Y3); - const float DY31 = sgn*(Y3 - Y1); + const float DY12 = sgn * (Y1 - Y2); + const float DY23 = sgn * (Y2 - Y3); + const float DY31 = sgn * (Y3 - Y1); - // Fixed-point deltas - const float FDX12 = DX12;// << 4; - const float FDX23 = DX23;// << 4; - const float FDX31 = DX31;// << 4; + // Fixed-point deltas + const float FDX12 = DX12;// << 4; + const float FDX23 = DX23;// << 4; + const float FDX31 = DX31;// << 4; - const float FDY12 = DY12;// << 4; - const float FDY23 = DY23;// << 4; - const float FDY31 = DY31;// << 4; + const float FDY12 = DY12;// << 4; + const float FDY23 = DY23;// << 4; + const float FDY31 = DY31;// << 4; - // Block size, standard 4x4 (must be power of two) - const int q = 4; + // Block size, standard 4x4 (must be power of two) + const int q = 4; - // Bounding rectangle - int minx = iround(mmin(X1, X2, X3, area->left));// +0xF) >> 4; - int miny = iround(mmin(Y1, Y2, Y3, area->top));// +0xF) >> 4; + // Bounding rectangle + int minx = iround(mmin(X1, X2, X3, area->left));// +0xF) >> 4; + int miny = iround(mmin(Y1, Y2, Y3, area->top));// +0xF) >> 4; - // Start in corner of block - minx &= ~(q - 1); - miny &= ~(q - 1); + // Start in corner of block + minx &= ~(q - 1); + miny &= ~(q - 1); - int spanx = iround(mmax(X1 + 0.5f, X2 + 0.5f, X3 + 0.5f, area->right)) - minx; - int spany = iround(mmax(Y1 + 0.5f, Y2 + 0.5f, Y3 + 0.5f, area->bottom)) - miny; + int spanx = iround(mmax(X1 + 0.5f, X2 + 0.5f, X3 + 0.5f, area->right)) - minx; + int spany = iround(mmax(Y1 + 0.5f, Y2 + 0.5f, Y3 + 0.5f, area->bottom)) - miny; - //Inside scissor area? - if (spanx < 0 || spany < 0) - return; + //Inside scissor area? + if (spanx < 0 || spany < 0) + return; - // Half-edge constants - float C1 = DY12 * X1 - DX12 * Y1; - float C2 = DY23 * X2 - DX23 * Y2; - float C3 = DY31 * X3 - DX31 * Y3; + // Half-edge constants + float C1 = DY12 * X1 - DX12 * Y1; + float C2 = DY23 * X2 - DX23 * Y2; + float C3 = DY31 * X3 - DX31 * Y3; - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - float MAX_12, MAX_23, MAX_31, MIN_12, MIN_23, MIN_31; + float MAX_12, MAX_23, MAX_31, MIN_12, MIN_23, MIN_31; - PlaneMinMax(MIN_12, MAX_12, DX12, DY12, q); - PlaneMinMax(MIN_23, MAX_23, DX23, DY23, q); - PlaneMinMax(MIN_31, MAX_31, DX31, DY31, q); + PlaneMinMax(MIN_12, MAX_12, DX12, DY12, q); + PlaneMinMax(MIN_23, MAX_23, DX23, DY23, q); + PlaneMinMax(MIN_31, MAX_31, DX31, DY31, q); - const float FDqX12 = FDX12 * q; - const float FDqX23 = FDX23 * q; - const float FDqX31 = FDX31 * q; + const float FDqX12 = FDX12 * q; + const float FDqX23 = FDX23 * q; + const float FDqX31 = FDX31 * q; - const float FDqY12 = FDY12 * q; - const float FDqY23 = FDY23 * q; - const float FDqY31 = FDY31 * q; + const float FDqY12 = FDY12 * q; + const float FDqY23 = FDY23 * q; + const float FDqY31 = FDY31 * q; - const float FDX12mq = FDX12 + FDY12*q; - const float FDX23mq = FDX23 + FDY23*q; - const float FDX31mq = FDX31 + FDY31*q; + const float FDX12mq = FDX12 + FDY12 * q; + const float FDX23mq = FDX23 + FDY23 * q; + const float FDX31mq = FDX31 + FDY31 * q; - float hs12 = C1 + FDX12 * (miny + 0.5f) - FDY12 * (minx + 0.5f) + FDqY12 - MIN_12; - float hs23 = C2 + FDX23 * (miny + 0.5f) - FDY23 * (minx + 0.5f) + FDqY23 - MIN_23; - float hs31 = C3 + FDX31 * (miny + 0.5f) - FDY31 * (minx + 0.5f) + FDqY31 - MIN_31; + float hs12 = C1 + FDX12 * (miny + 0.5f) - FDY12 * (minx + 0.5f) + FDqY12 - MIN_12; + float hs23 = C2 + FDX23 * (miny + 0.5f) - FDY23 * (minx + 0.5f) + FDqY23 - MIN_23; + float hs31 = C3 + FDX31 * (miny + 0.5f) - FDY31 * (minx + 0.5f) + FDqY31 - MIN_31; - MAX_12 -= MIN_12; - MAX_23 -= MIN_23; - MAX_31 -= MIN_31; + MAX_12 -= MIN_12; + MAX_23 -= MIN_23; + MAX_31 -= MIN_31; - float C1_pm = MIN_12; - float C2_pm = MIN_23; - float C3_pm = MIN_31; + float C1_pm = MIN_12; + float C2_pm = MIN_23; + float C3_pm = MIN_31; - u8* cb_y = (u8*)colorBuffer; - cb_y += miny*stride_bytes + minx*(q * 4); + u8* cb_y = (u8*)colorBuffer; + cb_y += miny * stride_bytes + minx * (q * 4); - DECL_ALIGN(64) IPs ip; + DECL_ALIGN(64) IPs ip; - ip.Setup(pp, &texture, v1, v2, v3, minx, miny, q); + ip.Setup(pp, &texture, v1, v2, v3, minx, miny, q); - __m128 y_ps = _mm_broadcast_float(miny); - __m128 minx_ps = _mm_load_scaled_float(minx - q, 1); - static DECL_ALIGN(16) float ones_ps[4] = { 1, 1, 1, 1 }; - static DECL_ALIGN(16) float q_ps[4] = { q, q, q, q }; + __m128 y_ps = _mm_broadcast_float(miny); + __m128 minx_ps = _mm_load_scaled_float(minx - q, 1); + static DECL_ALIGN(16) float ones_ps[4] = { 1, 1, 1, 1 }; + static DECL_ALIGN(16) float q_ps[4] = { q, q, q, q }; - // Loop through blocks - for (int y = spany; y > 0; y -= q) - { - float Xhs12 = hs12; - float Xhs23 = hs23; - float Xhs31 = hs31; - u8* cb_x = cb_y; - __m128 x_ps = minx_ps; - for (int x = spanx; x > 0; x -= q) + // Loop through blocks + for (int y = spany; y > 0; y -= q) { - Xhs12 -= FDqY12; - Xhs23 -= FDqY23; - Xhs31 -= FDqY31; - x_ps = _mm_add_ps(x_ps, *(__m128*)q_ps); - - // Corners of block - bool any = EvalHalfSpaceFAny(Xhs12, Xhs23, Xhs31); - - // Skip block when outside an edge - if (!any) + float Xhs12 = hs12; + float Xhs23 = hs23; + float Xhs31 = hs31; + u8* cb_x = cb_y; + __m128 x_ps = minx_ps; + for (int x = spanx; x > 0; x -= q) { - cb_x += q*q * 4; - continue; - } + Xhs12 -= FDqY12; + Xhs23 -= FDqY23; + Xhs31 -= FDqY31; + x_ps = _mm_add_ps(x_ps, *(__m128*)q_ps); - bool all = EvalHalfSpaceFAll(Xhs12, Xhs23, Xhs31, MAX_12, MAX_23, MAX_31); + // Corners of block + bool any = EvalHalfSpaceFAny(Xhs12, Xhs23, Xhs31); - // Accept whole block when totally covered - if (all) - { - __m128 yl_ps = y_ps; - for (int iy = q; iy > 0; iy--) + // Skip block when outside an edge + if (!any) { - PixelFlush TPL_PRMS_pixel(false) (pp, &texture, x_ps, yl_ps, cb_x, x_ps, ip); - yl_ps = _mm_add_ps(yl_ps, *(__m128*)ones_ps); - cb_x += sizeof(__m128); + cb_x += q * q * 4; + continue; } - } - else // Partially covered block - { - float CY1 = C1_pm + Xhs12; - float CY2 = C2_pm + Xhs23; - float CY3 = C3_pm + Xhs31; - __m128 pfdx12 = _mm_broadcast_float(FDX12); - __m128 pfdx23 = _mm_broadcast_float(FDX23); - __m128 pfdx31 = _mm_broadcast_float(FDX31); + bool all = EvalHalfSpaceFAll(Xhs12, Xhs23, Xhs31, MAX_12, MAX_23, MAX_31); - __m128 pcy1 = _mm_load_scaled_float(CY1, -FDY12); - __m128 pcy2 = _mm_load_scaled_float(CY2, -FDY23); - __m128 pcy3 = _mm_load_scaled_float(CY3, -FDY31); + // Accept whole block when totally covered + if (all) + { + __m128 yl_ps = y_ps; + for (int iy = q; iy > 0; iy--) + { + PixelFlush TPL_PRMS_pixel(false) (pp, &texture, x_ps, yl_ps, cb_x, x_ps, ip); + yl_ps = _mm_add_ps(yl_ps, *(__m128*)ones_ps); + cb_x += sizeof(__m128); + } + } + else // Partially covered block + { + float CY1 = C1_pm + Xhs12; + float CY2 = C2_pm + Xhs23; + float CY3 = C3_pm + Xhs31; - __m128 pzero = _mm_setzero_ps(); + __m128 pfdx12 = _mm_broadcast_float(FDX12); + __m128 pfdx23 = _mm_broadcast_float(FDX23); + __m128 pfdx31 = _mm_broadcast_float(FDX31); - //bool ok=false; - __m128 yl_ps = y_ps; + __m128 pcy1 = _mm_load_scaled_float(CY1, -FDY12); + __m128 pcy2 = _mm_load_scaled_float(CY2, -FDY23); + __m128 pcy3 = _mm_load_scaled_float(CY3, -FDY31); - for (int iy = q; iy > 0; iy--) - { - __m128 mask1 = _mm_cmple_ps(pcy1, pzero); - __m128 mask2 = _mm_cmple_ps(pcy2, pzero); - __m128 mask3 = _mm_cmple_ps(pcy3, pzero); - __m128 summary = _mm_or_ps(mask3, _mm_or_ps(mask2, mask1)); + __m128 pzero = _mm_setzero_ps(); - __m128i a = _mm_cmpeq_epi32((__m128i&)summary, (__m128i&)pzero); - int msk = _mm_movemask_ps((__m128&)a); + //bool ok=false; + __m128 yl_ps = y_ps; - if (msk != 0) + for (int iy = q; iy > 0; iy--) { - if (msk != 0xF) - PixelFlush TPL_PRMS_pixel(true) (pp, &texture, x_ps, yl_ps, cb_x, *(__m128*)&a, ip); - else - PixelFlush TPL_PRMS_pixel(false) (pp, &texture, x_ps, yl_ps, cb_x, *(__m128*)&a, ip); + __m128 mask1 = _mm_cmple_ps(pcy1, pzero); + __m128 mask2 = _mm_cmple_ps(pcy2, pzero); + __m128 mask3 = _mm_cmple_ps(pcy3, pzero); + __m128 summary = _mm_or_ps(mask3, _mm_or_ps(mask2, mask1)); + + __m128i a = _mm_cmpeq_epi32((__m128i&)summary, (__m128i&)pzero); + int msk = _mm_movemask_ps((__m128&)a); + + if (msk != 0) + { + if (msk != 0xF) + PixelFlush TPL_PRMS_pixel(true) (pp, &texture, x_ps, yl_ps, cb_x, *(__m128*) & a, ip); + else + PixelFlush TPL_PRMS_pixel(false) (pp, &texture, x_ps, yl_ps, cb_x, *(__m128*) & a, ip); + } + + yl_ps = _mm_add_ps(yl_ps, *(__m128*)ones_ps); + cb_x += sizeof(__m128); + + //CY1 += FDX12mq; + //CY2 += FDX23mq; + //CY3 += FDX31mq; + pcy1 = _mm_add_ps(pcy1, pfdx12); + pcy2 = _mm_add_ps(pcy2, pfdx23); + pcy3 = _mm_add_ps(pcy3, pfdx31); } - - yl_ps = _mm_add_ps(yl_ps, *(__m128*)ones_ps); - cb_x += sizeof(__m128); - - //CY1 += FDX12mq; - //CY2 += FDX23mq; - //CY3 += FDX31mq; - pcy1 = _mm_add_ps(pcy1, pfdx12); - pcy2 = _mm_add_ps(pcy2, pfdx23); - pcy3 = _mm_add_ps(pcy3, pfdx31); - } - /* - if (!ok) - { - nok++; + /* + if (!ok) + { + nok++; + } + else + { + fok++; + }*/ } - else - { - fok++; - }*/ } + next_y: + hs12 += FDqX12; + hs23 += FDqX23; + hs31 += FDqX31; + cb_y += stride_bytes * q; + y_ps = _mm_add_ps(y_ps, *(__m128*)q_ps); } - next_y: - hs12 += FDqX12; - hs23 += FDqX23; - hs31 += FDqX31; - cb_y += stride_bytes*q; - y_ps = _mm_add_ps(y_ps, *(__m128*)q_ps); } -} -#if HOST_OS == OS_WINDOWS - BITMAPINFOHEADER bi = { sizeof(BITMAPINFOHEADER), 0, 0, 1, 32, BI_RGB }; -#endif - - -struct softrend : Renderer -{ void SetFBScale(float x, float y) { fb_scale_x = x; @@ -831,7 +836,7 @@ struct softrend : Renderer ctx->rend_inuse.Lock(); - if (!ta_parse_vdrc(ctx)) + if (!ta_parse_vdrc(vram, ctx)) return false; return true; @@ -858,7 +863,7 @@ struct softrend : Renderer //// RendtriangleFn fn = RendtriangleFns[alpha_mode][params[i].tsp.UseAlpha][params[i].pcw.Texture][params[i].tsp.IgnoreTexA][params[i].tsp.ShadInstr][params[i].pcw.Offset]; - fn(¶ms[i], v, verts[poly_idx[v]], verts[poly_idx[v + 1]], verts[poly_idx[v + 2]], render_buffer, area); + (this->*fn)(¶ms[i], v, verts[poly_idx[v]], verts[poly_idx[v + 1]], verts[poly_idx[v + 2]], render_buffer, area); } } } @@ -956,6 +961,8 @@ struct softrend : Renderer decoded_colors[2][c] = (REP_16((c >> 0) % 16) << 24) | (REP_16((c >> 12) % 16) << 16) | (REP_16((c >> 8) % 16) << 8) | (REP_16((c >> 4) % 16) << 0); } +#define Rendtriangle softrend::Rendtriangle + { RendtriangleFns[0][0][1][0][0][0] = &Rendtriangle<0, 0, 1, 0, 0, 0>; RendtriangleFns[0][0][1][0][0][1] = &Rendtriangle<0, 0, 1, 0, 0, 1>; @@ -1152,7 +1159,7 @@ struct softrend : Renderer RendtriangleFns[2][1][0][1][3][0] = &Rendtriangle<2, 1, 0, 1, 3, 0>; RendtriangleFns[2][1][0][1][3][1] = &Rendtriangle<2, 1, 0, 1, 3, 1>; } - +#undef Rendtriangle return true; } @@ -1246,8 +1253,8 @@ struct softrend : Renderer } }; -Renderer* rend_softrend() { - return new(_mm_malloc(sizeof(softrend), 32)) ::softrend(); +Renderer* rend_softrend(u8* vram) { + return new(_mm_malloc(sizeof(softrend), 32)) ::softrend(vram); } static auto softrend = RegisterRendererBackend(rendererbackend_t{ "soft", "Fast Software Renderer", 0, rend_softrend }); \ No newline at end of file diff --git a/libswirl/scripting/lua_bindings.cpp b/libswirl/scripting/lua_bindings.cpp index bdad7c7199..95f1b04c75 100644 --- a/libswirl/scripting/lua_bindings.cpp +++ b/libswirl/scripting/lua_bindings.cpp @@ -96,16 +96,12 @@ inline static void _write_main(u32 addr, T value) { template inline static T _read_sound(u32 addr) { - if (sizeof(T) == 1) return (T)arm_ReadMem8(addr); - else if (sizeof(T) == 2) return (T)arm_ReadMem16(addr); - else return (T)arm_ReadMem32(addr); + return (T)0; } template inline static void _write_sound(u32 addr, T value) { - if (sizeof(T) == 1) arm_WriteMem8(addr, (u8)value); - else if (sizeof(T) == 2) arm_WriteMem16(addr, (u16)value); - else arm_WriteMem32(addr, (u32)value); + } template diff --git a/libswirl/serialize.cpp b/libswirl/serialize.cpp index 4bae46ee5a..589468ac45 100644 --- a/libswirl/serialize.cpp +++ b/libswirl/serialize.cpp @@ -1,9 +1,10 @@ // serialize.cpp : save states #if 1 #include "types.h" -#include "hw/aica/dsp.h" +#include "hw/aica/dsp_backend.h" #include "hw/aica/aica.h" #include "hw/aica/sgc_if.h" +#include "hw/arm7/arm7.h" #include "hw/sh4/sh4_mem_area0.h" #include "hw/flashrom/flashrom.h" #include "hw/mem/_vmem.h" @@ -39,28 +40,12 @@ enum serialize_version_enum { } ; //gdrom -void gdrom_serialize(void** data, unsigned int* total_size); -bool gdrom_unserialize(void** data, unsigned int* total_size); //./core/hw/arm7/arm_mem.cpp -extern bool aica_interr; -extern u32 aica_reg_L; -extern bool e68k_out; -extern u32 e68k_reg_L; -extern u32 e68k_reg_M; - //./core/hw/arm7/arm7.cpp -extern DECL_ALIGN(8) reg_pair arm_Reg[RN_ARM_REG_COUNT]; -extern bool armIrqEnable; -extern bool armFiqEnable; -extern int armMode; -extern bool Arm7Enabled; -extern u8 cpuBitsSet[256]; -extern bool intState ; -extern bool stopState ; -extern bool holdState ; + /* if AREC dynarec enabled: vector ops; @@ -83,7 +68,7 @@ extern bool holdState ; //./core/hw/aica/dsp.o -extern DECL_ALIGN(4096) dsp_t dsp; + //recheck dsp.cpp if FEAT_DSPREC == DYNAREC_JIT @@ -100,22 +85,12 @@ extern DECL_ALIGN(4096) dsp_t dsp; //extern InterruptInfo* SCIPD; //extern InterruptInfo* SCIRE; -extern AicaTimer timers[3]; - - //./core/hw/aica/aica_if.o -extern VLockedMemory aica_ram; -extern u32 VREG;//video reg =P -extern u32 ARMRST;//arm reset reg -extern u32 rtc_EN; -//extern s32 aica_pending_dma ; -extern int dma_sched_id; +//extern s32 aica_pending_dma ; //./core/hw/aica/aica_mem.o -extern u8 aica_reg[0x8000]; - //./core/hw/aica/sgc_if.o @@ -348,7 +323,6 @@ extern DECL_ALIGN(4) u32 SFaceOffsColor; //extern vector VramLocks[/*VRAM_SIZE*/(16*1024*1024)/PAGE_SIZE]; //maybe - probably not - just a locking mechanism //extern cMutex vramlist_lock; -extern VLockedMemory vram; @@ -416,8 +390,6 @@ extern vector sch_list; //./core/hw/sh4/interpr/sh4_interpreter.o extern int aica_schid; -extern int rtc_schid; - @@ -767,37 +739,14 @@ bool dc_serialize(void **data, unsigned int *total_size) return false ; REICAST_S(version) ; - REICAST_S(aica_interr) ; - REICAST_S(aica_reg_L) ; - REICAST_S(e68k_out) ; - REICAST_S(e68k_reg_L) ; - REICAST_S(e68k_reg_M) ; - - REICAST_SA(arm_Reg,RN_ARM_REG_COUNT); - REICAST_S(armIrqEnable); - REICAST_S(armFiqEnable); - REICAST_S(armMode); - REICAST_S(Arm7Enabled); - REICAST_SA(cpuBitsSet,256); - REICAST_S(intState); - REICAST_S(stopState); - REICAST_S(holdState); - - REICAST_S(dsp); - - for ( i = 0 ; i < 3 ; i++) - { - REICAST_S(timers[i].c_step); - REICAST_S(timers[i].m_step); + + for (int i = 0; i < A0H_MAX; i++) { + sh4_cpu->GetA0Handler((Area0Hanlders)i)->serialize(data, total_size); } + REICAST_S(dsp); - REICAST_SA(aica_ram.data,aica_ram.size) ; - REICAST_S(VREG); - REICAST_S(ARMRST); - REICAST_S(rtc_EN); - - REICAST_SA(aica_reg,0x8000); + REICAST_SA(sh4_cpu->aica_ram.data, sh4_cpu->aica_ram.size); @@ -840,8 +789,6 @@ bool dc_serialize(void **data, unsigned int *total_size) REICAST_SA(reply_11,16) ; - gdrom_serialize(data, total_size); - REICAST_SA(EEPROM,0x100); REICAST_S(EEPROM_loaded); @@ -890,7 +837,7 @@ bool dc_serialize(void **data, unsigned int *total_size) REICAST_S(SFaceBaseColor); REICAST_S(SFaceOffsColor); - REICAST_SA(vram.data, vram.size); + REICAST_SA(sh4_cpu->vram.data, sh4_cpu->vram.size); REICAST_SA(OnChipRAM.data,OnChipRAM_SIZE); @@ -954,18 +901,10 @@ bool dc_serialize(void **data, unsigned int *total_size) REICAST_S(sch_list[aica_schid].start) ; REICAST_S(sch_list[aica_schid].end) ; - REICAST_S(sch_list[rtc_schid].tag) ; - REICAST_S(sch_list[rtc_schid].start) ; - REICAST_S(sch_list[rtc_schid].end) ; - REICAST_S(sch_list[maple_schid].tag) ; REICAST_S(sch_list[maple_schid].start) ; REICAST_S(sch_list[maple_schid].end) ; - REICAST_S(sch_list[dma_sched_id].tag) ; - REICAST_S(sch_list[dma_sched_id].start) ; - REICAST_S(sch_list[dma_sched_id].end) ; - for (int i = 0; i < 3; i++) { REICAST_S(sch_list[tmu_sched[i]].tag) ; @@ -1082,39 +1021,13 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) int i = 0; int j = 0; - REICAST_US(aica_interr) ; - REICAST_US(aica_reg_L) ; - REICAST_US(e68k_out) ; - REICAST_US(e68k_reg_L) ; - REICAST_US(e68k_reg_M) ; - - REICAST_USA(arm_Reg,RN_ARM_REG_COUNT); - REICAST_US(armIrqEnable); - REICAST_US(armFiqEnable); - REICAST_US(armMode); - REICAST_US(Arm7Enabled); - REICAST_USA(cpuBitsSet,256); - REICAST_US(intState); - REICAST_US(stopState); - REICAST_US(holdState); - - REICAST_US(dsp); - - for ( i = 0 ; i < 3 ; i++) - { - REICAST_US(timers[i].c_step); - REICAST_US(timers[i].m_step); + for (int i = 0; i < A0H_MAX; i++) { + sh4_cpu->GetA0Handler((Area0Hanlders)i)->unserialize(data, total_size); } + REICAST_US(dsp); - REICAST_USA(aica_ram.data,aica_ram.size) ; - REICAST_US(VREG); - REICAST_US(ARMRST); - REICAST_US(rtc_EN); - - REICAST_USA(aica_reg,0x8000); - - + REICAST_USA(sh4_cpu->aica_ram.data, sh4_cpu->aica_ram.size); REICAST_USA(volume_lut,16); REICAST_USA(tl_lut,256 + 768); @@ -1162,7 +1075,6 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) REICAST_USA(reply_11,16); - gdrom_unserialize(data, total_size); REICAST_US(i); //LIBRETRO_S(GDROM_TICK); REICAST_USA(EEPROM,0x100); @@ -1234,7 +1146,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) u32 buf[1024]; //u32 *ptr = detwiddle[i][j] ; REICAST_US(buf); //LIBRETRO_SA(ptr,1024); } - REICAST_USA(vram.data, vram.size); + REICAST_USA(sh4_cpu->vram.data, sh4_cpu->vram.size); REICAST_USA(OnChipRAM.data,OnChipRAM_SIZE); @@ -1292,18 +1204,10 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) REICAST_US(sch_list[aica_schid].start) ; REICAST_US(sch_list[aica_schid].end) ; - REICAST_US(sch_list[rtc_schid].tag) ; - REICAST_US(sch_list[rtc_schid].start) ; - REICAST_US(sch_list[rtc_schid].end) ; - REICAST_US(sch_list[maple_schid].tag) ; REICAST_US(sch_list[maple_schid].start) ; REICAST_US(sch_list[maple_schid].end) ; - REICAST_US(sch_list[dma_sched_id].tag) ; - REICAST_US(sch_list[dma_sched_id].start) ; - REICAST_US(sch_list[dma_sched_id].end) ; - for (int i = 0; i < 3; i++) { REICAST_US(sch_list[tmu_sched[i]].tag) ; @@ -1440,37 +1344,15 @@ bool dc_unserialize(void **data, unsigned int *total_size) fprintf(stderr, "Save State version not supported: %d\n", version); return false; } - REICAST_US(aica_interr) ; - REICAST_US(aica_reg_L) ; - REICAST_US(e68k_out) ; - REICAST_US(e68k_reg_L) ; - REICAST_US(e68k_reg_M) ; - - REICAST_USA(arm_Reg,RN_ARM_REG_COUNT); - REICAST_US(armIrqEnable); - REICAST_US(armFiqEnable); - REICAST_US(armMode); - REICAST_US(Arm7Enabled); - REICAST_USA(cpuBitsSet,256); - REICAST_US(intState); - REICAST_US(stopState); - REICAST_US(holdState); - - REICAST_US(dsp); - for ( i = 0 ; i < 3 ; i++) - { - REICAST_US(timers[i].c_step); - REICAST_US(timers[i].m_step); + for (int i = 0; i < A0H_MAX; i++) { + sh4_cpu->GetA0Handler((Area0Hanlders)i)->unserialize(data, total_size); } - REICAST_USA(aica_ram.data,aica_ram.size) ; - REICAST_US(VREG); - REICAST_US(ARMRST); - REICAST_US(rtc_EN); - - REICAST_USA(aica_reg,0x8000); + REICAST_US(dsp); + REICAST_USA(sh4_cpu->aica_ram.data, sh4_cpu->aica_ram.size); + REICAST_USA(volume_lut,16); REICAST_USA(tl_lut,256 + 768); REICAST_USA(AEG_ATT_SPS,64); @@ -1508,10 +1390,6 @@ bool dc_unserialize(void **data, unsigned int *total_size) REICAST_USA(reply_11,16) ; - gdrom_unserialize(data, total_size); - - - REICAST_USA(EEPROM,0x100); REICAST_US(EEPROM_loaded); @@ -1561,7 +1439,7 @@ bool dc_unserialize(void **data, unsigned int *total_size) pal_needs_update = true; - REICAST_USA(vram.data, vram.size); + REICAST_USA(sh4_cpu->vram.data, sh4_cpu->vram.size); REICAST_USA(OnChipRAM.data,OnChipRAM_SIZE); @@ -1622,18 +1500,10 @@ bool dc_unserialize(void **data, unsigned int *total_size) REICAST_US(sch_list[aica_schid].start) ; REICAST_US(sch_list[aica_schid].end) ; - REICAST_US(sch_list[rtc_schid].tag) ; - REICAST_US(sch_list[rtc_schid].start) ; - REICAST_US(sch_list[rtc_schid].end) ; - REICAST_US(sch_list[maple_schid].tag) ; REICAST_US(sch_list[maple_schid].start) ; REICAST_US(sch_list[maple_schid].end) ; - REICAST_US(sch_list[dma_sched_id].tag) ; - REICAST_US(sch_list[dma_sched_id].start) ; - REICAST_US(sch_list[dma_sched_id].end) ; - for (int i = 0; i < 3; i++) { REICAST_US(sch_list[tmu_sched[i]].tag) ; diff --git a/libswirl/types.h b/libswirl/types.h index 5f55953629..ad4e01e85f 100644 --- a/libswirl/types.h +++ b/libswirl/types.h @@ -267,7 +267,6 @@ enum DriveEvent //****************************************************** //************************ AICA ************************ //****************************************************** -void libARM_InterruptChange(u32 bits,u32 L); void libCore_CDDA_Sector(s16* sector); @@ -434,90 +433,6 @@ bool dc_unserialize(void **data, unsigned int *total_size); #define REICAST_SA(v_arr,num) rc_serialize(v_arr, sizeof(v_arr[0])*num, data, total_size) #define REICAST_USA(v_arr,num) rc_unserialize(v_arr, sizeof(v_arr[0])*num, data, total_size) -enum -{ - RN_CPSR = 16, - RN_SPSR = 17, - - R13_IRQ = 18, - R14_IRQ = 19, - SPSR_IRQ = 20, - R13_USR = 26, - R14_USR = 27, - R13_SVC = 28, - R14_SVC = 29, - SPSR_SVC = 30, - R13_ABT = 31, - R14_ABT = 32, - SPSR_ABT = 33, - R13_UND = 34, - R14_UND = 35, - SPSR_UND = 36, - R8_FIQ = 37, - R9_FIQ = 38, - R10_FIQ = 39, - R11_FIQ = 40, - R12_FIQ = 41, - R13_FIQ = 42, - R14_FIQ = 43, - SPSR_FIQ = 44, - RN_PSR_FLAGS = 45, - R15_ARM_NEXT = 46, - INTR_PEND = 47, - CYCL_CNT = 48, - - RN_ARM_REG_COUNT, -}; - -typedef union -{ - struct - { - u8 B0; - u8 B1; - u8 B2; - u8 B3; - } B; - - struct - { - u16 W0; - u16 W1; - } W; - - union - { - struct - { - u32 _pad0 : 28; - u32 V : 1; //Bit 28 - u32 C : 1; //Bit 29 - u32 Z : 1; //Bit 30 - u32 N : 1; //Bit 31 - }; - - struct - { - u32 _pad1 : 28; - u32 NZCV : 4; //Bits [31:28] - }; - } FLG; - - struct - { - u32 M : 5; //mode, PSR[4:0] - u32 _pad0 : 1; //not used / zero - u32 F : 1; //FIQ disable, PSR[6] - u32 I : 1; //IRQ disable, PSR[7] - u32 _pad1 : 20; //not used / zero - u32 NZCV : 4; //Bits [31:28] - } PSR; - - u32 I; -} reg_pair; - - - #if COMPILER_VC_OR_CLANG_WIN32 #pragma warning( disable : 4127 4996 /*4244*/) @@ -582,8 +497,11 @@ struct settings_t bool safemode; bool disable_nvmem; SmcCheckEnum SmcCheckLevel; + int ScpuEnable; + int DspEnable; } dynarec; + struct { u32 run_counts; @@ -685,7 +603,7 @@ extern settings_t settings; void InitSettings(); void LoadSettings(bool game_specific); void SaveSettings(); -u32 GetRTC_now(); + extern u32 patchRB; inline bool is_s8(u32 v) { return (s8)v==(s32)v; } @@ -733,34 +651,10 @@ void plugins_Term(); void plugins_Reset(bool Manual); //PVR -struct PowerVR { - virtual s32 Init() = 0; - virtual void Reset(bool Manual) = 0; - virtual void Term() = 0; - - virtual ~PowerVR() { } - - static PowerVR* Create(); -}; void* libPvr_GetRenderTarget(); void* libPvr_GetRenderSurface(); -//AICA -struct AICA { - virtual s32 Init() = 0; - virtual void Reset(bool Manual) = 0; - virtual void Term() = 0; - - virtual u32 ReadReg(u32 addr, u32 size) = 0; - virtual void WriteReg(u32 addr, u32 data, u32 size) = 0; - - virtual void Update(u32 cycles) = 0; //called every ~1800 cycles, set to 0 if not used - virtual ~AICA() { } - - static AICA* Create(); -}; - //GDR void libCore_gdrom_disc_change(); @@ -797,17 +691,7 @@ static u32 libExtDevice_ReadMem_A5(u32 addr,u32 size){ return 0; } static void libExtDevice_WriteMem_A5(u32 addr,u32 data,u32 size) { } //ARM -struct SoundCPU { - virtual s32 Init() = 0; - virtual void Reset(bool M) = 0; - virtual void Term() = 0; - virtual void SetResetState(u32 State) = 0; - virtual void Update(u32 cycles) = 0; - virtual ~SoundCPU() { } - - static SoundCPU* Create(); -}; #define ReadMemArrRet(arr,addr,sz) \ {if (sz==1) \ diff --git a/reicast/android-studio/reicast/src/main/jni/Android.mk b/reicast/android-studio/reicast/src/main/jni/Android.mk index 0d148df2cc..36376b421f 100644 --- a/reicast/android-studio/reicast/src/main/jni/Android.mk +++ b/reicast/android-studio/reicast/src/main/jni/Android.mk @@ -76,15 +76,17 @@ endif LOCAL_CFLAGS += ifeq ($(TARGET_ARCH_ABI),x86) - LOCAL_CFLAGS+= -DTARGET_NO_AREC -DTARGET_NO_OPENMP + LOCAL_CFLAGS+= -DTARGET_NO_AREC endif +LOCAL_CFLAGS += -DTARGET_NO_OPENMP + LOCAL_PRELINK_MODULE := false LOCAL_MODULE := dc LOCAL_DISABLE_FORMAT_STRING_CHECKS=true LOCAL_ASFLAGS += -fPIC -fvisibility=hidden -LOCAL_LDLIBS += -llog -lEGL -lz -landroid -fopenmp +LOCAL_LDLIBS += -llog -lEGL -lz -landroid #-Wl,-Map,./res/raw/syms.mp3 LOCAL_ARM_MODE := arm diff --git a/reicast/android-studio/reicast/src/main/jni/Application.mk b/reicast/android-studio/reicast/src/main/jni/Application.mk index 374fb82d06..4d736d6f1b 100644 --- a/reicast/android-studio/reicast/src/main/jni/Application.mk +++ b/reicast/android-studio/reicast/src/main/jni/Application.mk @@ -1,6 +1,6 @@ -APP_CPPFLAGS := -frtti -fexceptions -std=c++17 -fopenmp +APP_CPPFLAGS := -frtti -fexceptions -std=c++17 APP_STL := c++_static -APP_ABI := armeabi-v7a arm64-v8a #x86 mips +APP_ABI := armeabi-v7a arm64-v8a #x86 mips` NDK_TOOLCHAIN_VERSION := clang APP_SHORT_COMMANDS := true #NAOMI := 1 \ No newline at end of file diff --git a/reicast/cmake/config.cmake b/reicast/cmake/config.cmake index dbff0b9fbc..2462879733 100644 --- a/reicast/cmake/config.cmake +++ b/reicast/cmake/config.cmake @@ -181,21 +181,32 @@ endif() -## Dynarec avail on x86,x64,arm and aarch64 in arm.32 compat -# +## Main CPU Dynarec if((${HOST_CPU} EQUAL ${CPU_X86}) OR (${HOST_CPU} EQUAL ${CPU_X64}) OR (${HOST_CPU} EQUAL ${CPU_ARM}) OR (${HOST_CPU} EQUAL ${CPU_A64})) -# - message("Dynarec Features Available") - + message("SH4 Dynarec Features Available") set(FEAT_SHREC ${DYNAREC_JIT}) - set(FEAT_AREC ${DYNAREC_NONE}) - set(FEAT_DSPREC ${DYNAREC_NONE}) -# else() -message("Dynarec Features Missing") + message("SH4 Dynarec Features Missing") set(FEAT_SHREC ${DYNAREC_CPP}) - set(FEAT_AREC ${DYNAREC_NONE}) +endif() + +# Sound CPU dynarec +if((${HOST_CPU} EQUAL ${CPU_X86}) OR (${HOST_CPU} EQUAL ${CPU_A64})) + message("ARM7 Dynarec Features Available") + set(FEAT_AREC ${DYNAREC_JIT}) +else() + message("ARM7 Dynarec Features Missing") + set(FEAT_AREC ${DYNAREC_NONE}) +endif() + +# Sound DSP dynarec +if((${HOST_CPU} EQUAL ${CPU_X86}) OR (${HOST_CPU} EQUAL ${CPU_A64})) + message("DSP Dynarec Features Available") + set(FEAT_DSPREC ${DYNAREC_JIT}) +# +else() + message("DSP Dynarec Features Missing") set(FEAT_DSPREC ${DYNAREC_NONE}) endif() @@ -208,7 +219,6 @@ if(TARGET_NO_REC) endif() if(TARGET_NO_AREC) - set(FEAT_SHREC ${DYNAREC_JIT}) set(FEAT_AREC ${DYNAREC_NONE}) set(FEAT_DSPREC ${DYNAREC_NONE}) endif() diff --git a/reicast/linux/configure-platform.make b/reicast/linux/configure-platform.make index f845230aa5..00596c53d6 100644 --- a/reicast/linux/configure-platform.make +++ b/reicast/linux/configure-platform.make @@ -38,7 +38,7 @@ else ifneq (,$(findstring arm64,$(platform))) USE_X11 := 1 SUPPORT_GLX := 1 - CFLAGS += -march=armv8-a -D TARGET_LINUX_ARMv8 -D TARGET_NO_AREC -fno-builtin-sqrtf + CFLAGS += -march=armv8-a -D TARGET_LINUX_ARMv8 -fno-builtin-sqrtf CXXFLAGS += -fexceptions # Generic 32 bit ARMhf (a.k.a. ARMv7h)