diff --git a/ares/n64/rsp/interpreter.cpp b/ares/n64/rsp/interpreter.cpp index cb868882e0..db5994c91e 100644 --- a/ares/n64/rsp/interpreter.cpp +++ b/ares/n64/rsp/interpreter.cpp @@ -7,9 +7,9 @@ #define VT vpu.r[VTn] #define jp(id, name, ...) case id: return decoder##name(__VA_ARGS__) -#define op(id, name, ...) case id: return name(__VA_ARGS__) -#define br(id, name, ...) case id: return name(__VA_ARGS__) -#define vu(id, name, ...) case id: \ +#define op(id, flags, name, ...) case id: pipeline flags; return name(__VA_ARGS__) +#define br(id, flags, name, ...) case id: pipeline flags; return name(__VA_ARGS__) +#define vu(id, flags, name, ...) case id: pipeline flags; \ switch(E) { \ case 0x0: return name<0x0>(__VA_ARGS__); \ case 0x1: return name<0x1>(__VA_ARGS__); \ @@ -41,291 +41,296 @@ #define IMMu16 u16(OP) #define IMMu26 (OP & 0x03ff'ffff) +#define R .regRead +#define W .regWrite +#define L .load() +#define S .store() + auto RSP::decoderEXECUTE() -> void { switch(OP >> 26) { jp(0x00, SPECIAL); jp(0x01, REGIMM); - br(0x02, J, IMMu26); - br(0x03, JAL, IMMu26); - br(0x04, BEQ, RS, RT, IMMi16); - br(0x05, BNE, RS, RT, IMMi16); - br(0x06, BLEZ, RS, IMMi16); - br(0x07, BGTZ, RS, IMMi16); - op(0x08, ADDIU, RT, RS, IMMi16); //ADDI - op(0x09, ADDIU, RT, RS, IMMi16); - op(0x0a, SLTI, RT, RS, IMMi16); - op(0x0b, SLTIU, RT, RS, IMMi16); - op(0x0c, ANDI, RT, RS, IMMu16); - op(0x0d, ORI, RT, RS, IMMu16); - op(0x0e, XORI, RT, RS, IMMu16); - op(0x0f, LUI, RT, IMMu16); + br(0x02, , J, IMMu26); + br(0x03, , JAL, IMMu26); + br(0x04, R(RSn) R(RTn) , BEQ, RS, RT, IMMi16); + br(0x05, R(RSn) R(RTn) , BNE, RS, RT, IMMi16); + br(0x06, R(RSn) , BLEZ, RS, IMMi16); + br(0x07, R(RSn) , BGTZ, RS, IMMi16); + op(0x08, R(RSn) , ADDIU, RT, RS, IMMi16); //ADDI + op(0x09, R(RSn) , ADDIU, RT, RS, IMMi16); + op(0x0a, R(RSn) , SLTI, RT, RS, IMMi16); + op(0x0b, R(RSn) , SLTIU, RT, RS, IMMi16); + op(0x0c, R(RSn) , ANDI, RT, RS, IMMu16); + op(0x0d, R(RSn) , ORI, RT, RS, IMMu16); + op(0x0e, R(RSn) , XORI, RT, RS, IMMu16); + op(0x0f, , LUI, RT, IMMu16); jp(0x10, SCC); - op(0x11, INVALID); //COP1 + op(0x11, , INVALID); //COP1 jp(0x12, VU); - op(0x13, INVALID); //COP3 - op(0x14, INVALID); //BEQL - op(0x15, INVALID); //BNEL - op(0x16, INVALID); //BLEZL - op(0x17, INVALID); //BGTZL - op(0x18, INVALID); //DADDI - op(0x19, INVALID); //DADDIU - op(0x1a, INVALID); //LDL - op(0x1b, INVALID); //LDR - op(0x1c, INVALID); - op(0x1d, INVALID); - op(0x1e, INVALID); - op(0x1f, INVALID); - op(0x20, LB, RT, RS, IMMi16); - op(0x21, LH, RT, RS, IMMi16); - op(0x22, INVALID); //LWL - op(0x23, LW, RT, RS, IMMi16); - op(0x24, LBU, RT, RS, IMMi16); - op(0x25, LHU, RT, RS, IMMi16); - op(0x26, INVALID); //LWR - op(0x27, LWU, RT, RS, IMMi16); - op(0x28, SB, RT, RS, IMMi16); - op(0x29, SH, RT, RS, IMMi16); - op(0x2a, INVALID); //SWL - op(0x2b, SW, RT, RS, IMMi16); - op(0x2c, INVALID); //SDL - op(0x2d, INVALID); //SDR - op(0x2e, INVALID); //SWR - op(0x2f, INVALID); //CACHE - op(0x30, INVALID); //LL - op(0x31, INVALID); //LWC1 + op(0x13, , INVALID); //COP3 + op(0x14, , INVALID); //BEQL + op(0x15, , INVALID); //BNEL + op(0x16, , INVALID); //BLEZL + op(0x17, , INVALID); //BGTZL + op(0x18, , INVALID); //DADDI + op(0x19, , INVALID); //DADDIU + op(0x1a, , INVALID); //LDL + op(0x1b, , INVALID); //LDR + op(0x1c, , INVALID); + op(0x1d, , INVALID); + op(0x1e, , INVALID); + op(0x1f, , INVALID); + op(0x20, R(RSn) W(RTn) L, LB, RT, RS, IMMi16); + op(0x21, R(RSn) W(RTn) L, LH, RT, RS, IMMi16); + op(0x22, , INVALID); //LWL + op(0x23, R(RSn) W(RTn) L, LW, RT, RS, IMMi16); + op(0x24, R(RSn) W(RTn) L, LBU, RT, RS, IMMi16); + op(0x25, R(RSn) W(RTn) L, LHU, RT, RS, IMMi16); + op(0x26, , INVALID); //LWR + op(0x27, R(RSn) W(RTn) L, LWU, RT, RS, IMMi16); + op(0x28, R(RTn) R(RSn) S, SB, RT, RS, IMMi16); + op(0x29, R(RTn) R(RSn) S, SH, RT, RS, IMMi16); + op(0x2a, , INVALID); //SWL + op(0x2b, R(RTn) R(RSn) S, SW, RT, RS, IMMi16); + op(0x2c, , INVALID); //SDL + op(0x2d, , INVALID); //SDR + op(0x2e, , INVALID); //SWR + op(0x2f, , INVALID); //CACHE + op(0x30, , INVALID); //LL + op(0x31, , INVALID); //LWC1 jp(0x32, LWC2); - op(0x33, INVALID); //LWC3 - op(0x34, INVALID); //LLD - op(0x35, INVALID); //LDC1 - op(0x36, INVALID); //LDC2 - op(0x37, INVALID); //LD - op(0x38, INVALID); //SC - op(0x39, INVALID); //SWC1 + op(0x33, , INVALID); //LWC3 + op(0x34, , INVALID); //LLD + op(0x35, , INVALID); //LDC1 + op(0x36, , INVALID); //LDC2 + op(0x37, , INVALID); //LD + op(0x38, , INVALID); //SC + op(0x39, , INVALID); //SWC1 jp(0x3a, SWC2); - op(0x3b, INVALID); //SWC3 - op(0x3c, INVALID); //SCD - op(0x3d, INVALID); //SDC1 - op(0x3e, INVALID); //SDC2 - op(0x3f, INVALID); //SD + op(0x3b, , INVALID); //SWC3 + op(0x3c, , INVALID); //SCD + op(0x3d, , INVALID); //SDC1 + op(0x3e, , INVALID); //SDC2 + op(0x3f, , INVALID); //SD } } auto RSP::decoderSPECIAL() -> void { switch(OP & 0x3f) { - op(0x00, SLL, RD, RT, SA); - op(0x01, INVALID); - op(0x02, SRL, RD, RT, SA); - op(0x03, SRA, RD, RT, SA); - op(0x04, SLLV, RD, RT, RS); - op(0x05, INVALID); - op(0x06, SRLV, RD, RT, RS); - op(0x07, SRAV, RD, RT, RS); - br(0x08, JR, RS); - br(0x09, JALR, RD, RS); - op(0x0a, INVALID); - op(0x0b, INVALID); - op(0x0c, INVALID); //SYSCALL - br(0x0d, BREAK); - op(0x0e, INVALID); - op(0x0f, INVALID); //SYNC - op(0x10, INVALID); //MFHI - op(0x11, INVALID); //MTHI - op(0x12, INVALID); //MFLO - op(0x13, INVALID); //MTLO - op(0x14, INVALID); //DSLLV - op(0x15, INVALID); - op(0x16, INVALID); //DSRLV - op(0x17, INVALID); //DSRAV - op(0x18, INVALID); //MULT - op(0x19, INVALID); //MULTU - op(0x1a, INVALID); //DIV - op(0x1b, INVALID); //DIVU - op(0x1c, INVALID); //DMULT - op(0x1d, INVALID); //DMULTU - op(0x1e, INVALID); //DDIV - op(0x1f, INVALID); //DDIVU - op(0x20, ADDU, RD, RS, RT); //ADD - op(0x21, ADDU, RD, RS, RT); - op(0x22, SUBU, RD, RS, RT); //SUB - op(0x23, SUBU, RD, RS, RT); - op(0x24, AND, RD, RS, RT); - op(0x25, OR, RD, RS, RT); - op(0x26, XOR, RD, RS, RT); - op(0x27, NOR, RD, RS, RT); - op(0x28, INVALID); - op(0x29, INVALID); - op(0x2a, SLT, RD, RS, RT); - op(0x2b, SLTU, RD, RS, RT); - op(0x2c, INVALID); //DADD - op(0x2d, INVALID); //DADDU - op(0x2e, INVALID); //DSUB - op(0x2f, INVALID); //DSUBU - op(0x30, INVALID); //TGE - op(0x31, INVALID); //TGEU - op(0x32, INVALID); //TLT - op(0x33, INVALID); //TLTU - op(0x34, INVALID); //TEQ - op(0x35, INVALID); - op(0x36, INVALID); //TNE - op(0x37, INVALID); - op(0x38, INVALID); //DSLL - op(0x39, INVALID); - op(0x3a, INVALID); //DSRL - op(0x3b, INVALID); //DSRA - op(0x3c, INVALID); //DSLL32 - op(0x3d, INVALID); - op(0x3e, INVALID); //DSRL32 - op(0x3f, INVALID); //DSRA32 + op(0x00, R(RTn) , SLL, RD, RT, SA); + op(0x01, , INVALID); + op(0x02, R(RTn) , SRL, RD, RT, SA); + op(0x03, R(RTn) , SRA, RD, RT, SA); + op(0x04, R(RTn) R(RSn), SLLV, RD, RT, RS); + op(0x05, , INVALID); + op(0x06, R(RTn) R(RSn), SRLV, RD, RT, RS); + op(0x07, R(RTn) R(RSn), SRAV, RD, RT, RS); + br(0x08, R(RSn) , JR, RS); + br(0x09, R(RSn) , JALR, RD, RS); + op(0x0a, , INVALID); + op(0x0b, , INVALID); + op(0x0c, , INVALID); //SYSCALL + br(0x0d, , BREAK); + op(0x0e, , INVALID); + op(0x0f, , INVALID); //SYNC + op(0x10, , INVALID); //MFHI + op(0x11, , INVALID); //MTHI + op(0x12, , INVALID); //MFLO + op(0x13, , INVALID); //MTLO + op(0x14, , INVALID); //DSLLV + op(0x15, , INVALID); + op(0x16, , INVALID); //DSRLV + op(0x17, , INVALID); //DSRAV + op(0x18, , INVALID); //MULT + op(0x19, , INVALID); //MULTU + op(0x1a, , INVALID); //DIV + op(0x1b, , INVALID); //DIVU + op(0x1c, , INVALID); //DMULT + op(0x1d, , INVALID); //DMULTU + op(0x1e, , INVALID); //DDIV + op(0x1f, , INVALID); //DDIVU + op(0x20, R(RSn) R(RTn), ADDU, RD, RS, RT); //ADD + op(0x21, R(RSn) R(RTn), ADDU, RD, RS, RT); + op(0x22, R(RSn) R(RTn), SUBU, RD, RS, RT); //SUB + op(0x23, R(RSn) R(RTn), SUBU, RD, RS, RT); + op(0x24, R(RSn) R(RTn), AND, RD, RS, RT); + op(0x25, R(RSn) R(RTn), OR, RD, RS, RT); + op(0x26, R(RSn) R(RTn), XOR, RD, RS, RT); + op(0x27, R(RSn) R(RTn), NOR, RD, RS, RT); + op(0x28, , INVALID); + op(0x29, , INVALID); + op(0x2a, R(RSn) R(RTn), SLT, RD, RS, RT); + op(0x2b, R(RSn) R(RTn), SLTU, RD, RS, RT); + op(0x2c, , INVALID); //DADD + op(0x2d, , INVALID); //DADDU + op(0x2e, , INVALID); //DSUB + op(0x2f, , INVALID); //DSUBU + op(0x30, , INVALID); //TGE + op(0x31, , INVALID); //TGEU + op(0x32, , INVALID); //TLT + op(0x33, , INVALID); //TLTU + op(0x34, , INVALID); //TEQ + op(0x35, , INVALID); + op(0x36, , INVALID); //TNE + op(0x37, , INVALID); + op(0x38, , INVALID); //DSLL + op(0x39, , INVALID); + op(0x3a, , INVALID); //DSRL + op(0x3b, , INVALID); //DSRA + op(0x3c, , INVALID); //DSLL32 + op(0x3d, , INVALID); + op(0x3e, , INVALID); //DSRL32 + op(0x3f, , INVALID); //DSRA32 } } auto RSP::decoderREGIMM() -> void { switch(OP >> 16 & 0x1f) { - br(0x00, BLTZ, RS, IMMi16); - br(0x01, BGEZ, RS, IMMi16); - op(0x02, INVALID); //BLTZL - op(0x03, INVALID); //BGEZL - op(0x04, INVALID); - op(0x05, INVALID); - op(0x06, INVALID); - op(0x07, INVALID); - op(0x08, INVALID); //TGEI - op(0x09, INVALID); //TGEIU - op(0x0a, INVALID); //TLTI - op(0x0b, INVALID); //TLTIU - op(0x0c, INVALID); //TEQI - op(0x0d, INVALID); - op(0x0e, INVALID); //TNEI - op(0x0f, INVALID); - br(0x10, BLTZAL, RS, IMMi16); - br(0x11, BGEZAL, RS, IMMi16); - op(0x12, INVALID); //BLTZALL - op(0x13, INVALID); //BGEZALL - op(0x14, INVALID); - op(0x15, INVALID); - op(0x16, INVALID); - op(0x17, INVALID); - op(0x18, INVALID); - op(0x19, INVALID); - op(0x1a, INVALID); - op(0x1b, INVALID); - op(0x1c, INVALID); - op(0x1d, INVALID); - op(0x1e, INVALID); - op(0x1f, INVALID); + br(0x00, R(RSn), BLTZ, RS, IMMi16); + br(0x01, R(RSn), BGEZ, RS, IMMi16); + op(0x02, , INVALID); //BLTZL + op(0x03, , INVALID); //BGEZL + op(0x04, , INVALID); + op(0x05, , INVALID); + op(0x06, , INVALID); + op(0x07, , INVALID); + op(0x08, , INVALID); //TGEI + op(0x09, , INVALID); //TGEIU + op(0x0a, , INVALID); //TLTI + op(0x0b, , INVALID); //TLTIU + op(0x0c, , INVALID); //TEQI + op(0x0d, , INVALID); + op(0x0e, , INVALID); //TNEI + op(0x0f, , INVALID); + br(0x10, R(RSn), BLTZAL, RS, IMMi16); + br(0x11, R(RSn), BGEZAL, RS, IMMi16); + op(0x12, , INVALID); //BLTZALL + op(0x13, , INVALID); //BGEZALL + op(0x14, , INVALID); + op(0x15, , INVALID); + op(0x16, , INVALID); + op(0x17, , INVALID); + op(0x18, , INVALID); + op(0x19, , INVALID); + op(0x1a, , INVALID); + op(0x1b, , INVALID); + op(0x1c, , INVALID); + op(0x1d, , INVALID); + op(0x1e, , INVALID); + op(0x1f, , INVALID); } } auto RSP::decoderSCC() -> void { switch(OP >> 21 & 0x1f) { - op(0x00, MFC0, RT, RDn); - op(0x01, INVALID); //DMFC0 - op(0x02, INVALID); //CFC0 - op(0x03, INVALID); - op(0x04, MTC0, RT, RDn); - op(0x05, INVALID); //DMTC0 - op(0x06, INVALID); //CTC0 - op(0x07, INVALID); - op(0x08, INVALID); //BC0 - op(0x09, INVALID); - op(0x0a, INVALID); - op(0x0b, INVALID); - op(0x0c, INVALID); - op(0x0d, INVALID); - op(0x0e, INVALID); - op(0x0f, INVALID); + op(0x00, W(RTn) L S, MFC0, RT, RDn); + op(0x01, , INVALID); //DMFC0 + op(0x02, , INVALID); //CFC0 + op(0x03, , INVALID); + op(0x04, R(RTn) L S, MTC0, RT, RDn); + op(0x05, , INVALID); //DMTC0 + op(0x06, , INVALID); //CTC0 + op(0x07, , INVALID); + op(0x08, , INVALID); //BC0 + op(0x09, , INVALID); + op(0x0a, , INVALID); + op(0x0b, , INVALID); + op(0x0c, , INVALID); + op(0x0d, , INVALID); + op(0x0e, , INVALID); + op(0x0f, , INVALID); } } auto RSP::decoderVU() -> void { #define E (OP >> 7 & 15) switch(OP >> 21 & 0x1f) { - vu(0x00, MFC2, RT, VS); - op(0x01, INVALID); //DMFC2 - op(0x02, CFC2, RT, RDn); - op(0x03, INVALID); - vu(0x04, MTC2, RT, VS); - op(0x05, INVALID); //DMTC2 - op(0x06, CTC2, RT, RDn); - op(0x07, INVALID); - op(0x08, INVALID); //BC2 - op(0x09, INVALID); - op(0x0a, INVALID); - op(0x0b, INVALID); - op(0x0c, INVALID); - op(0x0d, INVALID); - op(0x0e, INVALID); - op(0x0f, INVALID); + vu(0x00, W(RTn) L S, MFC2, RT, VS); + op(0x01, , INVALID); //DMFC2 + op(0x02, W(RTn) L S, CFC2, RT, RDn); + op(0x03, , INVALID); + vu(0x04, R(RTn) L S, MTC2, RT, VS); + op(0x05, , INVALID); //DMTC2 + op(0x06, R(RTn) L S, CTC2, RT, RDn); + op(0x07, , INVALID); + op(0x08, , INVALID); //BC2 + op(0x09, , INVALID); + op(0x0a, , INVALID); + op(0x0b, , INVALID); + op(0x0c, , INVALID); + op(0x0d, , INVALID); + op(0x0e, , INVALID); + op(0x0f, , INVALID); } #undef E #define E (OP >> 21 & 15) #define DE (OP >> 11 & 7) switch(OP & 0x3f) { - vu(0x00, VMULF, VD, VS, VT); - vu(0x01, VMULU, VD, VS, VT); - vu(0x02, VRNDP, VD, VSn, VT); - vu(0x03, VMULQ, VD, VS, VT); - vu(0x04, VMUDL, VD, VS, VT); - vu(0x05, VMUDM, VD, VS, VT); - vu(0x06, VMUDN, VD, VS, VT); - vu(0x07, VMUDH, VD, VS, VT); - vu(0x08, VMACF, VD, VS, VT); - vu(0x09, VMACU, VD, VS, VT); - vu(0x0a, VRNDN, VD, VSn, VT); - op(0x0b, VMACQ, VD); - vu(0x0c, VMADL, VD, VS, VT); - vu(0x0d, VMADM, VD, VS, VT); - vu(0x0e, VMADN, VD, VS, VT); - vu(0x0f, VMADH, VD, VS, VT); - vu(0x10, VADD, VD, VS, VT); - vu(0x11, VSUB, VD, VS, VT); - vu(0x12, VZERO, VD, VS, VT); //VSUT - vu(0x13, VABS, VD, VS, VT); - vu(0x14, VADDC, VD, VS, VT); - vu(0x15, VSUBC, VD, VS, VT); - vu(0x16, VZERO, VD, VS, VT); //VADDB - vu(0x17, VZERO, VD, VS, VT); //VSUBB - vu(0x18, VZERO, VD, VS, VT); //VACCB - vu(0x19, VZERO, VD, VS, VT); //VSUCB - vu(0x1a, VZERO, VD, VS, VT); //VSAD - vu(0x1b, VZERO, VD, VS, VT); //VSAC - vu(0x1c, VZERO, VD, VS, VT); //VSUM - vu(0x1d, VSAR, VD, VS); - vu(0x1e, VZERO, VD, VS, VT); - vu(0x1f, VZERO, VD, VS, VT); - vu(0x20, VLT, VD, VS, VT); - vu(0x21, VEQ, VD, VS, VT); - vu(0x22, VNE, VD, VS, VT); - vu(0x23, VGE, VD, VS, VT); - vu(0x24, VCL, VD, VS, VT); - vu(0x25, VCH, VD, VS, VT); - vu(0x26, VCR, VD, VS, VT); - vu(0x27, VMRG, VD, VS, VT); - vu(0x28, VAND, VD, VS, VT); - vu(0x29, VNAND, VD, VS, VT); - vu(0x2a, VOR, VD, VS, VT); - vu(0x2b, VNOR, VD, VS, VT); - vu(0x2c, VXOR, VD, VS, VT); - vu(0x2d, VNXOR, VD, VS, VT); - vu(0x2e, VZERO, VD, VS, VT); - vu(0x2f, VZERO, VD, VS, VT); - vu(0x30, VRCP, VD, DE, VT); - vu(0x31, VRCPL, VD, DE, VT); - vu(0x32, VRCPH, VD, DE, VT); - vu(0x33, VMOV, VD, DE, VT); - vu(0x34, VRSQ, VD, DE, VT); - vu(0x35, VRSQL, VD, DE, VT); - vu(0x36, VRSQH, VD, DE, VT); - op(0x37, VNOP); - vu(0x38, VZERO, VD, VS, VT); //VEXTT - vu(0x39, VZERO, VD, VS, VT); //VEXTQ - vu(0x3a, VZERO, VD, VS, VT); //VEXTN - vu(0x3b, VZERO, VD, VS, VT); - vu(0x3c, VZERO, VD, VS, VT); //VINST - vu(0x3d, VZERO, VD, VS, VT); //VINSQ - vu(0x3e, VZERO, VD, VS, VT); //VINSN - op(0x3f, VNOP); //VNULL + vu(0x00, , VMULF, VD, VS, VT); + vu(0x01, , VMULU, VD, VS, VT); + vu(0x02, , VRNDP, VD, VSn, VT); + vu(0x03, , VMULQ, VD, VS, VT); + vu(0x04, , VMUDL, VD, VS, VT); + vu(0x05, , VMUDM, VD, VS, VT); + vu(0x06, , VMUDN, VD, VS, VT); + vu(0x07, , VMUDH, VD, VS, VT); + vu(0x08, , VMACF, VD, VS, VT); + vu(0x09, , VMACU, VD, VS, VT); + vu(0x0a, , VRNDN, VD, VSn, VT); + op(0x0b, , VMACQ, VD); + vu(0x0c, , VMADL, VD, VS, VT); + vu(0x0d, , VMADM, VD, VS, VT); + vu(0x0e, , VMADN, VD, VS, VT); + vu(0x0f, , VMADH, VD, VS, VT); + vu(0x10, , VADD, VD, VS, VT); + vu(0x11, , VSUB, VD, VS, VT); + vu(0x12, , VZERO, VD, VS, VT); //VSUT + vu(0x13, , VABS, VD, VS, VT); + vu(0x14, , VADDC, VD, VS, VT); + vu(0x15, , VSUBC, VD, VS, VT); + vu(0x16, , VZERO, VD, VS, VT); //VADDB + vu(0x17, , VZERO, VD, VS, VT); //VSUBB + vu(0x18, , VZERO, VD, VS, VT); //VACCB + vu(0x19, , VZERO, VD, VS, VT); //VSUCB + vu(0x1a, , VZERO, VD, VS, VT); //VSAD + vu(0x1b, , VZERO, VD, VS, VT); //VSAC + vu(0x1c, , VZERO, VD, VS, VT); //VSUM + vu(0x1d, , VSAR, VD, VS); + vu(0x1e, , VZERO, VD, VS, VT); + vu(0x1f, , VZERO, VD, VS, VT); + vu(0x20, , VLT, VD, VS, VT); + vu(0x21, , VEQ, VD, VS, VT); + vu(0x22, , VNE, VD, VS, VT); + vu(0x23, , VGE, VD, VS, VT); + vu(0x24, , VCL, VD, VS, VT); + vu(0x25, , VCH, VD, VS, VT); + vu(0x26, , VCR, VD, VS, VT); + vu(0x27, , VMRG, VD, VS, VT); + vu(0x28, , VAND, VD, VS, VT); + vu(0x29, , VNAND, VD, VS, VT); + vu(0x2a, , VOR, VD, VS, VT); + vu(0x2b, , VNOR, VD, VS, VT); + vu(0x2c, , VXOR, VD, VS, VT); + vu(0x2d, , VNXOR, VD, VS, VT); + vu(0x2e, , VZERO, VD, VS, VT); + vu(0x2f, , VZERO, VD, VS, VT); + vu(0x30, , VRCP, VD, DE, VT); + vu(0x31, , VRCPL, VD, DE, VT); + vu(0x32, , VRCPH, VD, DE, VT); + vu(0x33, , VMOV, VD, DE, VT); + vu(0x34, , VRSQ, VD, DE, VT); + vu(0x35, , VRSQL, VD, DE, VT); + vu(0x36, , VRSQH, VD, DE, VT); + op(0x37, , VNOP); + vu(0x38, , VZERO, VD, VS, VT); //VEXTT + vu(0x39, , VZERO, VD, VS, VT); //VEXTQ + vu(0x3a, , VZERO, VD, VS, VT); //VEXTN + vu(0x3b, , VZERO, VD, VS, VT); + vu(0x3c, , VZERO, VD, VS, VT); //VINST + vu(0x3d, , VZERO, VD, VS, VT); //VINSQ + vu(0x3e, , VZERO, VD, VS, VT); //VINSN + op(0x3f, , VNOP); //VNULL } #undef E #undef DE @@ -335,18 +340,18 @@ auto RSP::decoderLWC2() -> void { #define E (OP >> 7 & 15) #define IMMi7 i7(OP) switch(OP >> 11 & 0x1f) { - vu(0x00, LBV, VT, RS, IMMi7); - vu(0x01, LSV, VT, RS, IMMi7); - vu(0x02, LLV, VT, RS, IMMi7); - vu(0x03, LDV, VT, RS, IMMi7); - vu(0x04, LQV, VT, RS, IMMi7); - vu(0x05, LRV, VT, RS, IMMi7); - vu(0x06, LPV, VT, RS, IMMi7); - vu(0x07, LUV, VT, RS, IMMi7); - vu(0x08, LHV, VT, RS, IMMi7); - vu(0x09, LFV, VT, RS, IMMi7); -//vu(0x0a, LWV, VT, RS, IMMi7); //not present on N64 RSP - vu(0x0b, LTV, VTn, RS, IMMi7); + vu(0x00, R(RSn) L, LBV, VT, RS, IMMi7); + vu(0x01, R(RSn) L, LSV, VT, RS, IMMi7); + vu(0x02, R(RSn) L, LLV, VT, RS, IMMi7); + vu(0x03, R(RSn) L, LDV, VT, RS, IMMi7); + vu(0x04, R(RSn) L, LQV, VT, RS, IMMi7); + vu(0x05, R(RSn) L, LRV, VT, RS, IMMi7); + vu(0x06, R(RSn) L, LPV, VT, RS, IMMi7); + vu(0x07, R(RSn) L, LUV, VT, RS, IMMi7); + vu(0x08, R(RSn) L, LHV, VT, RS, IMMi7); + vu(0x09, R(RSn) L, LFV, VT, RS, IMMi7); +//vu(0x0a, R(RSn) L, LWV, VT, RS, IMMi7); //not present on N64 RSP + vu(0x0b, R(RSn) L, LTV, VTn, RS, IMMi7); } #undef E #undef IMMi7 @@ -356,18 +361,18 @@ auto RSP::decoderSWC2() -> void { #define E (OP >> 7 & 15) #define IMMi7 i7(OP) switch(OP >> 11 & 0x1f) { - vu(0x00, SBV, VT, RS, IMMi7); - vu(0x01, SSV, VT, RS, IMMi7); - vu(0x02, SLV, VT, RS, IMMi7); - vu(0x03, SDV, VT, RS, IMMi7); - vu(0x04, SQV, VT, RS, IMMi7); - vu(0x05, SRV, VT, RS, IMMi7); - vu(0x06, SPV, VT, RS, IMMi7); - vu(0x07, SUV, VT, RS, IMMi7); - vu(0x08, SHV, VT, RS, IMMi7); - vu(0x09, SFV, VT, RS, IMMi7); - vu(0x0a, SWV, VT, RS, IMMi7); - vu(0x0b, STV, VTn, RS, IMMi7); + vu(0x00, R(RSn) S, SBV, VT, RS, IMMi7); + vu(0x01, R(RSn) S, SSV, VT, RS, IMMi7); + vu(0x02, R(RSn) S, SLV, VT, RS, IMMi7); + vu(0x03, R(RSn) S, SDV, VT, RS, IMMi7); + vu(0x04, R(RSn) S, SQV, VT, RS, IMMi7); + vu(0x05, R(RSn) S, SRV, VT, RS, IMMi7); + vu(0x06, R(RSn) S, SPV, VT, RS, IMMi7); + vu(0x07, R(RSn) S, SUV, VT, RS, IMMi7); + vu(0x08, R(RSn) S, SHV, VT, RS, IMMi7); + vu(0x09, R(RSn) S, SFV, VT, RS, IMMi7); + vu(0x0a, R(RSn) S, SWV, VT, RS, IMMi7); + vu(0x0b, R(RSn) S, STV, VTn, RS, IMMi7); } #undef E #undef IMMi7 @@ -376,6 +381,11 @@ auto RSP::decoderSWC2() -> void { auto RSP::INVALID() -> void { } +#undef L +#undef S +#undef R +#undef W + #undef SA #undef RDn #undef RTn diff --git a/ares/n64/rsp/recompiler.cpp b/ares/n64/rsp/recompiler.cpp index e10a886f8a..274ef4eb59 100644 --- a/ares/n64/rsp/recompiler.cpp +++ b/ares/n64/rsp/recompiler.cpp @@ -38,6 +38,7 @@ auto RSP::Recompiler::block(u12 address) -> Block* { auto size = measure(address); auto hashcode = hash(address, size); + hashcode ^= self.pipeline.hash(); BlockHashPair pair; pair.hashcode = hashcode; @@ -66,14 +67,19 @@ auto RSP::Recompiler::emit(u12 address) -> Block* { reset(); } + pipeline = self.pipeline; + auto block = (Block*)allocator.acquire(sizeof(Block)); beginFunction(3); u12 start = address; bool hasBranched = 0; while(true) { + pipeline.begin(); u32 instruction = self.imem.read(address); bool branched = emitEXECUTE(instruction); + pipeline.end(); + mov32(reg(1), imm(pipeline.clocks)); call(&RSP::instructionEpilogue); address += 4; if(hasBranched || address == start) break; @@ -82,9 +88,13 @@ auto RSP::Recompiler::emit(u12 address) -> Block* { } jumpEpilog(); + //reset clocks to zero every time block is executed + pipeline.clocks = 0; + memory::jitprotect(false); block->code = endFunction(); block->size = address - start; + block->pipeline = pipeline; //print(hex(PC, 8L), " ", instructions, " ", size(), "\n"); return block; @@ -155,6 +165,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //BEQ Rs,Rt,i16 case 0x04: { + pipeline.regRead(Rsn).regRead(Rtn); lea(reg(1), Rs); lea(reg(2), Rt); mov32(reg(3), imm(i16)); @@ -164,6 +175,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //BNE Rs,Rt,i16 case 0x05: { + pipeline.regRead(Rsn).regRead(Rtn); lea(reg(1), Rs); lea(reg(2), Rt); mov32(reg(3), imm(i16)); @@ -173,6 +185,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //BLEZ Rs,i16 case 0x06: { + pipeline.regRead(Rsn); lea(reg(1), Rs); mov32(reg(2), imm(i16)); call(&RSP::BLEZ); @@ -181,6 +194,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //BGTZ Rs,i16 case 0x07: { + pipeline.regRead(Rsn); lea(reg(1), Rs); mov32(reg(2), imm(i16)); call(&RSP::BGTZ); @@ -189,12 +203,14 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //ADDIU Rt,Rs,i16 case range2(0x08, 0x09): { + pipeline.regRead(Rsn); add32(mem(Rt), mem(Rs), imm(i16)); return 0; } //SLTI Rt,Rs,i16 case 0x0a: { + pipeline.regRead(Rsn); cmp32(mem(Rs), imm(i16), set_slt); mov32_f(mem(Rt), flag_slt); return 0; @@ -202,6 +218,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //SLTIU Rt,Rs,i16 case 0x0b: { + pipeline.regRead(Rsn); cmp32(mem(Rs), imm(i16), set_ult); mov32_f(mem(Rt), flag_ult); return 0; @@ -209,18 +226,21 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //ANDI Rt,Rs,n16 case 0x0c: { + pipeline.regRead(Rsn); and32(mem(Rt), mem(Rs), imm(n16)); return 0; } //ORI Rt,Rs,n16 case 0x0d: { + pipeline.regRead(Rsn); or32(mem(Rt), mem(Rs), imm(n16)); return 0; } //XORI Rt,Rs,n16 case 0x0e: { + pipeline.regRead(Rsn); xor32(mem(Rt), mem(Rs), imm(n16)); return 0; } @@ -253,6 +273,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //LB Rt,Rs,i16 case 0x20: { + pipeline.regRead(Rsn).regWrite(Rtn).load(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -262,6 +283,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //LH Rt,Rs,i16 case 0x21: { + pipeline.regRead(Rsn).regWrite(Rtn).load(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -276,6 +298,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //LW Rt,Rs,i16 case 0x23: { + pipeline.regRead(Rsn).regWrite(Rtn).load(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -285,6 +308,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //LBU Rt,Rs,i16 case 0x24: { + pipeline.regRead(Rsn).regWrite(Rtn).load(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -294,6 +318,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //LHU Rt,Rs,i16 case 0x25: { + pipeline.regRead(Rsn).regWrite(Rtn).load(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -308,6 +333,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //LWU Rt,Rs,i16 case 0x27: { + pipeline.regRead(Rsn).regWrite(Rtn).load(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -317,6 +343,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //SB Rt,Rs,i16 case 0x28: { + pipeline.regRead(Rsn).regRead(Rtn).store(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -326,6 +353,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //SH Rt,Rs,i16 case 0x29: { + pipeline.regRead(Rsn).regRead(Rtn).store(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -340,6 +368,7 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool { //SW Rt,Rs,i16 case 0x2b: { + pipeline.regRead(Rsn).regRead(Rtn).store(); lea(reg(1), Rt); lea(reg(2), Rs); mov32(reg(3), imm(i16)); @@ -382,6 +411,7 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //SLL Rd,Rt,Sa case 0x00: { + pipeline.regRead(Rtn); shl32(mem(Rd), mem(Rt), imm(Sa)); return 0; } @@ -393,18 +423,21 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //SRL Rd,Rt,Sa case 0x02: { + pipeline.regRead(Rtn); lshr32(mem(Rd), mem(Rt), imm(Sa)); return 0; } //SRA Rd,Rt,Sa case 0x03: { + pipeline.regRead(Rtn); ashr32(mem(Rd), mem(Rt), imm(Sa)); return 0; } //SLLV Rd,Rt,Rs case 0x04: { + pipeline.regRead(Rsn).regRead(Rtn); mshl32(mem(Rd), mem(Rt), mem(Rs)); return 0; } @@ -416,18 +449,21 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //SRLV Rd,Rt,Rs case 0x06: { + pipeline.regRead(Rsn).regRead(Rtn); mlshr32(mem(Rd), mem(Rt), mem(Rs)); return 0; } //SRAV Rd,Rt,Rs case 0x07: { + pipeline.regRead(Rsn).regRead(Rtn); mashr32(mem(Rd), mem(Rt), mem(Rs)); return 0; } //JR Rs case 0x08: { + pipeline.regRead(Rsn); lea(reg(1), Rs); call(&RSP::JR); return 1; @@ -435,6 +471,7 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //JALR Rd,Rs case 0x09: { + pipeline.regRead(Rsn); lea(reg(1), Rd); lea(reg(2), Rs); call(&RSP::JALR); @@ -459,36 +496,42 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //ADDU Rd,Rs,Rt case range2(0x20, 0x21): { + pipeline.regRead(Rsn).regRead(Rtn); add32(mem(Rd), mem(Rs), mem(Rt)); return 0; } //SUBU Rd,Rs,Rt case range2(0x22, 0x23): { + pipeline.regRead(Rsn).regRead(Rtn); sub32(mem(Rd), mem(Rs), mem(Rt)); return 0; } //AND Rd,Rs,Rt case 0x24: { + pipeline.regRead(Rsn).regRead(Rtn); and32(mem(Rd), mem(Rs), mem(Rt)); return 0; } //OR Rd,Rs,Rt case 0x25: { + pipeline.regRead(Rsn).regRead(Rtn); or32(mem(Rd), mem(Rs), mem(Rt)); return 0; } //XOR Rd,Rs,Rt case 0x26: { + pipeline.regRead(Rsn).regRead(Rtn); xor32(mem(Rd), mem(Rs), mem(Rt)); return 0; } //NOR Rd,Rs,Rt case 0x27: { + pipeline.regRead(Rsn).regRead(Rtn); or32(reg(0), mem(Rs), mem(Rt)); xor32(reg(0), reg(0), imm(-1)); mov32(mem(Rd), reg(0)); @@ -502,6 +545,7 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //SLT Rd,Rs,Rt case 0x2a: { + pipeline.regRead(Rsn).regRead(Rtn); cmp32(mem(Rs), mem(Rt), set_slt); mov32_f(mem(Rd), flag_slt); return 0; @@ -509,6 +553,7 @@ auto RSP::Recompiler::emitSPECIAL(u32 instruction) -> bool { //SLTU Rd,Rs,Rt case 0x2b: { + pipeline.regRead(Rsn).regRead(Rtn); cmp32(mem(Rs), mem(Rt), set_ult); mov32_f(mem(Rd), flag_ult); return 0; @@ -529,6 +574,7 @@ auto RSP::Recompiler::emitREGIMM(u32 instruction) -> bool { //BLTZ Rs,i16 case 0x00: { + pipeline.regRead(Rsn); lea(reg(1), Rs); mov32(reg(2), imm(i16)); call(&RSP::BLTZ); @@ -537,6 +583,7 @@ auto RSP::Recompiler::emitREGIMM(u32 instruction) -> bool { //BGEZ Rs,i16 case 0x01: { + pipeline.regRead(Rsn); lea(reg(1), Rs); mov32(reg(2), imm(i16)); call(&RSP::BGEZ); @@ -550,6 +597,7 @@ auto RSP::Recompiler::emitREGIMM(u32 instruction) -> bool { //BLTZAL Rs,i16 case 0x10: { + pipeline.regRead(Rsn); lea(reg(1), Rs); mov32(reg(2), imm(i16)); call(&RSP::BLTZAL); @@ -558,6 +606,7 @@ auto RSP::Recompiler::emitREGIMM(u32 instruction) -> bool { //BGEZAL Rs,i16 case 0x11: { + pipeline.regRead(Rsn); lea(reg(1), Rs); mov32(reg(2), imm(i16)); call(&RSP::BGEZAL); @@ -579,6 +628,7 @@ auto RSP::Recompiler::emitSCC(u32 instruction) -> bool { //MFC0 Rt,Rd case 0x00: { + pipeline.regWrite(Rtn).load().store(); lea(reg(1), Rt); mov32(reg(2), imm(Rdn)); call(&RSP::MFC0); @@ -592,6 +642,7 @@ auto RSP::Recompiler::emitSCC(u32 instruction) -> bool { //MTC0 Rt,Rd case 0x04: { + pipeline.regRead(Rtn).load().store(); lea(reg(1), Rt); mov32(reg(2), imm(Rdn)); call(&RSP::MTC0); @@ -614,6 +665,7 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool { //MFC2 Rt,Vs(e) case 0x00: { + pipeline.regWrite(Rtn).load().store(); lea(reg(1), Rt); lea(reg(2), Vs); callvu(&RSP::MFC2); @@ -627,6 +679,7 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool { //CFC2 Rt,Rd case 0x02: { + pipeline.regWrite(Rtn).load().store(); lea(reg(1), Rt); mov32(reg(2), imm(Rdn)); call(&RSP::CFC2); @@ -640,6 +693,7 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool { //MTC2 Rt,Vs(e) case 0x04: { + pipeline.regRead(Rtn).load().store(); lea(reg(1), Rt); lea(reg(2), Vs); callvu(&RSP::MTC2); @@ -653,6 +707,7 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool { //CTC2 Rt,Rd case 0x06: { + pipeline.regRead(Rtn).load().store(); lea(reg(1), Rt); mov32(reg(2), imm(Rdn)); call(&RSP::CTC2); @@ -1144,6 +1199,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LBV Vt(e),Rs,i7 case 0x00: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1153,6 +1209,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LSV Vt(e),Rs,i7 case 0x01: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1162,6 +1219,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LLV Vt(e),Rs,i7 case 0x02: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1171,6 +1229,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LDV Vt(e),Rs,i7 case 0x03: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1180,6 +1239,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LQV Vt(e),Rs,i7 case 0x04: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1189,6 +1249,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LRV Vt(e),Rs,i7 case 0x05: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1198,6 +1259,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LPV Vt(e),Rs,i7 case 0x06: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1207,6 +1269,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LUV Vt(e),Rs,i7 case 0x07: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1216,6 +1279,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LHV Vt(e),Rs,i7 case 0x08: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1225,6 +1289,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LFV Vt(e),Rs,i7 case 0x09: { + pipeline.regRead(Rsn).load(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1239,6 +1304,7 @@ auto RSP::Recompiler::emitLWC2(u32 instruction) -> bool { //LTV Vt(e),Rs,i7 case 0x0b: { + pipeline.regRead(Rsn).load(); mov32(reg(1), imm(Vtn)); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1265,6 +1331,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SBV Vt(e),Rs,i7 case 0x00: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1274,6 +1341,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SSV Vt(e),Rs,i7 case 0x01: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1283,6 +1351,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SLV Vt(e),Rs,i7 case 0x02: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1292,6 +1361,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SDV Vt(e),Rs,i7 case 0x03: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1301,6 +1371,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SQV Vt(e),Rs,i7 case 0x04: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1310,6 +1381,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SRV Vt(e),Rs,i7 case 0x05: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1319,6 +1391,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SPV Vt(e),Rs,i7 case 0x06: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1328,6 +1401,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SUV Vt(e),Rs,i7 case 0x07: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1337,6 +1411,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SHV Vt(e),Rs,i7 case 0x08: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1346,6 +1421,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SFV Vt(e),Rs,i7 case 0x09: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1355,6 +1431,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //SWV Vt(e),Rs,i7 case 0x0a: { + pipeline.regRead(Rsn).store(); lea(reg(1), Vt); lea(reg(2), Rs); mov32(reg(3), imm(i7)); @@ -1364,6 +1441,7 @@ auto RSP::Recompiler::emitSWC2(u32 instruction) -> bool { //STV Vt(e),Rs,i7 case 0x0b: { + pipeline.regRead(Rsn).store(); mov32(reg(1), imm(Vtn)); lea(reg(2), Rs); mov32(reg(3), imm(i7)); diff --git a/ares/n64/rsp/rsp.cpp b/ares/n64/rsp/rsp.cpp index 425928fa44..ab9ccb2ed2 100644 --- a/ares/n64/rsp/rsp.cpp +++ b/ares/n64/rsp/rsp.cpp @@ -46,18 +46,23 @@ auto RSP::instruction() -> void { } if constexpr(Accuracy::RSP::Interpreter) { + pipeline.begin(); pipeline.address = ipu.pc; pipeline.instruction = imem.read(pipeline.address); debugger.instruction(); decoderEXECUTE(); - instructionEpilogue(); - step(3); + pipeline.end(); + instructionEpilogue(0); } + + //this handles all stepping for the interpreter + //with the recompiler, it only steps for taken branch stalls + step(pipeline.clocks); } -auto RSP::instructionEpilogue() -> s32 { +auto RSP::instructionEpilogue(u32 clocks) -> s32 { if constexpr(Accuracy::RSP::Recompiler) { - step(3); + step(clocks); } ipu.r[0].u32 = 0; @@ -65,7 +70,7 @@ auto RSP::instructionEpilogue() -> s32 { switch(branch.state) { case Branch::Step: ipu.pc += 4; return status.halted; case Branch::Take: ipu.pc += 4; branch.delaySlot(); return status.halted; - case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); return 1; + case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); pipeline.stall(); return 1; } unreachable; diff --git a/ares/n64/rsp/rsp.hpp b/ares/n64/rsp/rsp.hpp index 8cda7862a9..d8307af73c 100644 --- a/ares/n64/rsp/rsp.hpp +++ b/ares/n64/rsp/rsp.hpp @@ -33,13 +33,74 @@ struct RSP : Thread, Memory::RCP { auto step(u32 clocks) -> void; auto instruction() -> void; - auto instructionEpilogue() -> s32; + auto instructionEpilogue(u32 clocks) -> s32; auto power(bool reset) -> void; struct Pipeline { u32 address; u32 instruction; + u32 clocks; + + struct { + n1 load; + n5 lockReg; + } current, previous, previous2; + + auto hash() const -> u32 { + u32 value = 0; + value |= previous.load << 0; + value |= previous.lockReg << 1; + value |= previous2.load << 6; + value |= previous2.lockReg << 7; + return value; + } + + auto begin() -> void { + clocks = 0; + } + + auto end() -> void { + previous2 = previous; + previous = current; + current = {}; + clocks += 3; + } + + auto stall() -> void { + previous2 = previous; + previous = {}; + clocks += 3; + } + + auto regRead(u5 index) -> Pipeline& { + if(index == 0) { + //zero register can't be locked + } else if(index == previous.lockReg) { + stall(); + stall(); + } else if(index == previous2.lockReg) { + stall(); + } + return *this; + } + + auto regWrite(u5 index) -> Pipeline& { + current.lockReg = index; + return *this; + } + + auto load() -> Pipeline& { + current.load = 1; + return *this; + } + + auto store() -> Pipeline& { + while(previous2.load) { + stall(); + } + return *this; + } } pipeline; //dma.cpp @@ -333,11 +394,13 @@ struct RSP : Thread, Memory::RCP { struct Block { auto execute(RSP& self) -> void { + self.pipeline = pipeline; //must be updated first so instructionEpilog() can handle taken branch ((void (*)(RSP*, IPU*, VU*))code)(&self, &self.ipu, &self.vpu); } u8* code; u12 size; + Pipeline pipeline; //state at *end* of block excepting taken branch stall }; struct BlockHashPair { @@ -385,6 +448,7 @@ struct RSP : Thread, Memory::RCP { return s <= e ? smask & emask : smask | emask; } + Pipeline pipeline; bump_allocator allocator; array context; hashset blocks; diff --git a/ares/n64/rsp/serialization.cpp b/ares/n64/rsp/serialization.cpp index 3da6b71fc7..c6e59f5d85 100644 --- a/ares/n64/rsp/serialization.cpp +++ b/ares/n64/rsp/serialization.cpp @@ -5,6 +5,10 @@ auto RSP::serialize(serializer& s) -> void { s(pipeline.address); s(pipeline.instruction); + s(pipeline.previous.load); + s(pipeline.previous.lockReg); + s(pipeline.previous2.load); + s(pipeline.previous2.lockReg); s(dma.pending); s(dma.current); diff --git a/ares/n64/system/serialization.cpp b/ares/n64/system/serialization.cpp index b199fca5a2..3e0d65bd3a 100644 --- a/ares/n64/system/serialization.cpp +++ b/ares/n64/system/serialization.cpp @@ -1,4 +1,4 @@ -static const string SerializerVersion = "v131"; +static const string SerializerVersion = "v133"; auto System::serialize(bool synchronize) -> serializer { serializer s;