diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 56e62fcbba34ed..f34bf6bb69ef12 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -599,11 +599,11 @@ void CodeGen::genCodeForBBlist() noway_assert(genStackLevel == 0); #ifdef TARGET_AMD64 - bool emitNop = false; + bool emitNopBeforeEHRegion = false; // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. - // The document "X64 and ARM ABIs.docx" has more details. The situations: + // The document "clr-abi.md" has more details. The situations: // 1. If the call instruction is in a different EH region as the instruction that follows it. // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters @@ -624,7 +624,7 @@ void CodeGen::genCodeForBBlist() case BBJ_ALWAYS: // We might skip generating the jump via a peephole optimization. // If that happens, make sure a NOP is emitted as the last instruction in the block. - emitNop = true; + emitNopBeforeEHRegion = true; break; case BBJ_THROW: @@ -737,7 +737,7 @@ void CodeGen::genCodeForBBlist() if (block->CanRemoveJumpToNext(compiler)) { #ifdef TARGET_AMD64 - if (emitNop) + if (emitNopBeforeEHRegion) { instGen(INS_nop); } @@ -752,13 +752,6 @@ void CodeGen::genCodeForBBlist() bool isRemovableJmpCandidate = !block->hasAlign() && !compiler->fgInDifferentRegions(block, block->GetJumpDest()); -#ifdef TARGET_AMD64 - // AMD64 requires an instruction after a call instruction for unwinding - // inside an EH region so if the last instruction generated was a call instruction - // do not allow this jump to be marked for possible later removal. - isRemovableJmpCandidate = isRemovableJmpCandidate && !GetEmitter()->emitIsLastInsCall(); -#endif // TARGET_AMD64 - inst_JMP(EJ_jmp, block->GetJumpDest(), isRemovableJmpCandidate); #else inst_JMP(EJ_jmp, block->GetJumpDest()); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 0ea898fd74c802..c329d05defcef6 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -4630,9 +4630,11 @@ void emitter::emitRemoveJumpToNextInst() insGroup* jmpGroup = jmp->idjIG; instrDescJmp* nextJmp = jmp->idjNext; - if (jmp->idInsFmt() == IF_LABEL && emitIsUncondJump(jmp) && jmp->idjIsRemovableJmpCandidate) + if (jmp->idjIsRemovableJmpCandidate) { #if DEBUG + assert(jmp->idInsFmt() == IF_LABEL); + assert(emitIsUncondJump(jmp)); assert((jmpGroup->igFlags & IGF_HAS_ALIGN) == 0); assert((jmpGroup->igNum > previousJumpIgNum) || (previousJumpIgNum == (UNATIVE_OFFSET)-1) || ((jmpGroup->igNum == previousJumpIgNum) && (jmp->idDebugOnlyInfo()->idNum > previousJumpInsNum))); @@ -4699,6 +4701,25 @@ void emitter::emitRemoveJumpToNextInst() UNATIVE_OFFSET codeSize = jmp->idCodeSize(); jmp->idCodeSize(0); +#ifdef TARGET_AMD64 + // If the removed jump is after a call and before an OS epilog, it needs to be replaced by a nop + if (jmp->idjIsAfterCallBeforeEpilog) + { + if ((targetGroup->igFlags & IGF_EPILOG) != 0) + { + // This jump will become a nop, so set its size now to ensure below calculations are correct + jmp->idCodeSize(1); + codeSize--; + } + else + { + // We don't need a nop if the removed jump isn't before an OS epilog, + // so zero jmp->idjIsAfterCallBeforeEpilog to avoid emitting a nop + jmp->idjIsAfterCallBeforeEpilog = 0; + } + } +#endif // TARGET_AMD64 + jmpGroup->igSize -= (unsigned short)codeSize; jmpGroup->igFlags |= IGF_UPD_ISZ; @@ -4707,6 +4728,9 @@ void emitter::emitRemoveJumpToNextInst() } else { + // The jump was not removed; make sure idjIsRemovableJmpCandidate reflects this + jmp->idjIsRemovableJmpCandidate = 0; + // Update the previousJmp previousJmp = jmp; #if DEBUG diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 3e960949bbc44a..2c27b80d07f239 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1869,11 +1869,17 @@ class emitter // beginning of the function -- of the target instruction of the jump, used to // determine if this jump needs to be patched. unsigned idjOffs : -#if defined(TARGET_XARCH) - 29; - // indicates that the jump was added at the end of a BBJ_ALWAYS basic block and is +#if defined(TARGET_AMD64) + 28; + // Indicates the jump was added at the end of a BBJ_ALWAYS basic block and is // a candidate for being removed if it jumps to the next instruction unsigned idjIsRemovableJmpCandidate : 1; + // Indicates the jump follows a call instruction and precedes an OS epilog. + // If this jump is removed, a nop will need to be emitted instead (see clr-abi.md for details). + unsigned idjIsAfterCallBeforeEpilog : 1; +#elif defined(TARGET_X86) + 29; + unsigned idjIsRemovableJmpCandidate : 1; #else 30; #endif diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index f7fce9e744c4b8..29a35ac4f97104 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -3493,11 +3493,16 @@ const emitJumpKind emitReverseJumpKinds[] = { // /* static */ bool emitter::emitJmpInstHasNoCode(instrDesc* id) { - bool result = (id->idIns() == INS_jmp) && (id->idCodeSize() == 0); + bool result = (id->idIns() == INS_jmp) && ((instrDescJmp*)id)->idjIsRemovableJmpCandidate; - // A zero size jump instruction can only be the one that is marked - // as removable candidate. - assert(!result || ((instrDescJmp*)id)->idjIsRemovableJmpCandidate); +// A jump marked for removal must have a code size of 0, +// except for jumps that must be replaced by nops on AMD64 (these must have a size of 1) +#ifdef TARGET_AMD64 + const bool isNopReplacement = ((instrDescJmp*)id)->idjIsAfterCallBeforeEpilog && (id->idCodeSize() == 1); + assert(!result || (id->idCodeSize() == 0) || isNopReplacement); +#else // !TARGET_AMD64 + assert(!result || (id->idCodeSize() == 0)); +#endif // !TARGET_AMD64 return result; } @@ -9082,6 +9087,11 @@ void emitter::emitIns_J(instruction ins, int instrCount /* = 0 */, bool isRemovableJmpCandidate /* = false */) { +#ifdef TARGET_AMD64 + // Check emitter::emitLastIns before it is updated + const bool lastInsIsCall = emitIsLastInsCall(); +#endif // TARGET_AMD64 + UNATIVE_OFFSET sz; instrDescJmp* id = emitNewInstrJmp(); @@ -9109,9 +9119,23 @@ void emitter::emitIns_J(instruction ins, } #endif // DEBUG - emitContainsRemovableJmpCandidates |= isRemovableJmpCandidate; - id->idjIsRemovableJmpCandidate = isRemovableJmpCandidate ? 1 : 0; - id->idjShort = 0; + if (isRemovableJmpCandidate) + { + emitContainsRemovableJmpCandidates = true; + id->idjIsRemovableJmpCandidate = 1; +#ifdef TARGET_AMD64 + // If this jump is after a call instruction, we might need to insert a nop after it's removed, + // but only if the jump is before an OS epilog. + // We'll check for the OS epilog in emitter::emitRemoveJumpToNextInst(). + id->idjIsAfterCallBeforeEpilog = lastInsIsCall ? 1 : 0; +#endif // TARGET_AMD64 + } + else + { + id->idjIsRemovableJmpCandidate = 0; + } + + id->idjShort = 0; if (dst != nullptr) { /* Assume the jump will be long */ @@ -16054,6 +16078,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) insFormat insFmt = id->idInsFmt(); unsigned char callInstrSize = 0; + // Indicates a jump between after a call and before an OS epilog was replaced by a nop on AMD64 + bool convertedJmpToNop = false; + #ifdef DEBUG bool dspOffs = emitComp->opts.dspGCtbls; #endif // DEBUG @@ -16174,22 +16201,50 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } case IF_LABEL: - case IF_RWR_LABEL: - case IF_SWR_LABEL: { + instrDescJmp* jmp = (instrDescJmp*)id; assert(id->idGCref() == GCT_NONE); - assert(id->idIsBound() || emitJmpInstHasNoCode(id)); - // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()? - if (id->idCodeSize() != 0) + if (!jmp->idjIsRemovableJmpCandidate) { + assert(id->idIsBound()); dst = emitOutputLJ(ig, dst, id); } +#ifdef TARGET_AMD64 + else if (jmp->idjIsAfterCallBeforeEpilog) + { + // Need to insert a nop if the removed jump was after a call and before an OS epilog + // (The code size should already be set to 1 for the nop) + assert(id->idCodeSize() == 1); + dst = emitOutputNOP(dst, 1); + + // Set convertedJmpToNop in case we need to print this instrDesc as a nop in a disasm + convertedJmpToNop = true; + } +#endif // TARGET_AMD64 else { - assert(((instrDescJmp*)id)->idjIsRemovableJmpCandidate); + // Jump was removed, and no nop was needed, so id should not have any code + assert(jmp->idjIsRemovableJmpCandidate); + assert(emitJmpInstHasNoCode(id)); } - sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp)); + + sz = sizeof(instrDescJmp); + break; + } + case IF_RWR_LABEL: + case IF_SWR_LABEL: + { + assert(id->idGCref() == GCT_NONE); + assert(id->idIsBound() || emitJmpInstHasNoCode(id)); + instrDescJmp* jmp = (instrDescJmp*)id; + + // Jump removal optimization is only for IF_LABEL + assert(!jmp->idjIsRemovableJmpCandidate); + + // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()? + dst = emitOutputLJ(ig, dst, id); + sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp)); break; } @@ -17447,8 +17502,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; } - // Make sure we set the instruction descriptor size correctly +// Make sure we set the instruction descriptor size correctly +#ifdef TARGET_AMD64 + // If a jump is replaced by a nop, its instrDesc is temporarily modified so the nop + // is displayed correctly in disasms. Check for this discrepancy to avoid triggering this assert. + assert(((ins == INS_jmp) && (id->idIns() == INS_nop)) || (sz == emitSizeOfInsDsc(id))); +#else // !TARGET_AMD64 assert(sz == emitSizeOfInsDsc(id)); +#endif // !TARGET_AMD64 #if !FEATURE_FIXED_OUT_ARGS bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig); @@ -17505,14 +17566,47 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert(*dp != dst || emitInstHasNoCode(id)); #ifdef DEBUG - if ((emitComp->opts.disAsm || emitComp->verbose) && !emitJmpInstHasNoCode(id)) + if ((emitComp->opts.disAsm || emitComp->verbose) && (!emitJmpInstHasNoCode(id) || convertedJmpToNop)) { - emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); +#ifdef TARGET_AMD64 + // convertedJmpToNop indicates this instruction is a removable jump that was replaced by a nop. + // The instrDesc still describes a jump, so in order to print the nop in the disasm correctly, + // set the instruction and format accordingly (and reset them after to avoid triggering asserts). + if (convertedJmpToNop) + { + id->idIns(INS_nop); + id->idInsFmt(IF_NONE); + + emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); + + id->idIns(ins); + id->idInsFmt(insFmt); + } + else +#endif // TARGET_AMD64 + { + emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); + } } #else - if (emitComp->opts.disAsm && !emitJmpInstHasNoCode(id)) + if (emitComp->opts.disAsm && (!emitJmpInstHasNoCode(id) || convertedJmpToNop)) { - emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); +#ifdef TARGET_AMD64 + if (convertedJmpToNop) + { + id->idIns(INS_nop); + id->idInsFmt(IF_NONE); + + emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); + + id->idIns(ins); + id->idInsFmt(insFmt); + } + else +#endif // TARGET_AMD64 + { + emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); + } } #endif