Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Make isRemovableJmpCandidate less restrictive on AMD64 #95493

Merged
merged 12 commits into from
Dec 7, 2023
15 changes: 4 additions & 11 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,11 +599,11 @@ void CodeGen::genCodeForBBlist()
noway_assert(genStackLevel == 0);

#ifdef TARGET_AMD64
bool emitNop = false;
bool emitNopBeforeEHRegion = false;
// On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
// situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
// walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
// The document "X64 and ARM ABIs.docx" has more details. The situations:
// The document "clr-abi.md" has more details. The situations:
// 1. If the call instruction is in a different EH region as the instruction that follows it.
// 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
// be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
Expand All @@ -624,7 +624,7 @@ void CodeGen::genCodeForBBlist()
case BBJ_ALWAYS:
// We might skip generating the jump via a peephole optimization.
// If that happens, make sure a NOP is emitted as the last instruction in the block.
emitNop = true;
emitNopBeforeEHRegion = true;
break;

case BBJ_THROW:
Expand Down Expand Up @@ -737,7 +737,7 @@ void CodeGen::genCodeForBBlist()
if (block->CanRemoveJumpToNext(compiler))
{
#ifdef TARGET_AMD64
if (emitNop)
if (emitNopBeforeEHRegion)
{
instGen(INS_nop);
}
Expand All @@ -752,13 +752,6 @@ void CodeGen::genCodeForBBlist()
bool isRemovableJmpCandidate =
!block->hasAlign() && !compiler->fgInDifferentRegions(block, block->GetJumpDest());

#ifdef TARGET_AMD64
// AMD64 requires an instruction after a call instruction for unwinding
// inside an EH region so if the last instruction generated was a call instruction
// do not allow this jump to be marked for possible later removal.
isRemovableJmpCandidate = isRemovableJmpCandidate && !GetEmitter()->emitIsLastInsCall();
#endif // TARGET_AMD64

inst_JMP(EJ_jmp, block->GetJumpDest(), isRemovableJmpCandidate);
#else
inst_JMP(EJ_jmp, block->GetJumpDest());
Expand Down
15 changes: 14 additions & 1 deletion src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4630,9 +4630,11 @@ void emitter::emitRemoveJumpToNextInst()
insGroup* jmpGroup = jmp->idjIG;
instrDescJmp* nextJmp = jmp->idjNext;

if (jmp->idInsFmt() == IF_LABEL && emitIsUncondJump(jmp) && jmp->idjIsRemovableJmpCandidate)
if (jmp->idjIsRemovableJmpCandidate)
{
#if DEBUG
assert(jmp->idInsFmt() == IF_LABEL);
assert(emitIsUncondJump(jmp));
assert((jmpGroup->igFlags & IGF_HAS_ALIGN) == 0);
assert((jmpGroup->igNum > previousJumpIgNum) || (previousJumpIgNum == (UNATIVE_OFFSET)-1) ||
((jmpGroup->igNum == previousJumpIgNum) && (jmp->idDebugOnlyInfo()->idNum > previousJumpInsNum)));
Expand Down Expand Up @@ -4699,6 +4701,14 @@ void emitter::emitRemoveJumpToNextInst()
UNATIVE_OFFSET codeSize = jmp->idCodeSize();
jmp->idCodeSize(0);

#ifdef TARGET_AMD64
if (jmp->idjIsAfterCall)
{
jmp->idCodeSize(1);
codeSize--;
}
#endif // TARGET_AMD64

jmpGroup->igSize -= (unsigned short)codeSize;
jmpGroup->igFlags |= IGF_UPD_ISZ;

Expand All @@ -4707,6 +4717,9 @@ void emitter::emitRemoveJumpToNextInst()
}
else
{
// The jump was not removed; make sure idjIsRemovableJmpCandidate reflects this
jmp->idjIsRemovableJmpCandidate = 0;

// Update the previousJmp
previousJmp = jmp;
#if DEBUG
Expand Down
14 changes: 10 additions & 4 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1869,14 +1869,20 @@ class emitter
// beginning of the function -- of the target instruction of the jump, used to
// determine if this jump needs to be patched.
unsigned idjOffs :
#if defined(TARGET_XARCH)
29;
// indicates that the jump was added at the end of a BBJ_ALWAYS basic block and is
#if defined(TARGET_AMD64)
28;
// Indicates the jump was added at the end of a BBJ_ALWAYS basic block and is
Comment on lines +1872 to +1874
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw, all this bit fiddling is silly on 64-bit, where we're going to have 32 bits of padding after the unsigned fields, in which case we should just move all the bits to the end and make them bool :1 fields. Maybe not for 32-bit, though.

But that shouldn't be done in this PR :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I'll come back to this in a follow-up PR.

// a candidate for being removed if it jumps to the next instruction
unsigned idjIsRemovableJmpCandidate : 1;
// Indicates the jump succeeds a call instruction; if this jump is removed,
// a nop will need to be emitted instead (see clr-abi.md for details)
unsigned idjIsAfterCall : 1;
#elif defined(TARGET_XARCH)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You split off amd64, so the only "xarch" left is x86

Suggested change
#elif defined(TARGET_XARCH)
#elif defined(TARGET_X86)

29;
unsigned idjIsRemovableJmpCandidate : 1;
#else
30;
#endif
#endif // !defined(TARGET_XARCH)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, this comment is not actually helpful.

unsigned idjShort : 1; // is the jump known to be a short one?
unsigned idjKeepLong : 1; // should the jump be kept long? (used for hot to cold and cold to hot jumps)
};
Expand Down
58 changes: 48 additions & 10 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3493,11 +3493,28 @@ const emitJumpKind emitReverseJumpKinds[] = {
//
/* static */ bool emitter::emitJmpInstHasNoCode(instrDesc* id)
{
bool result = (id->idIns() == INS_jmp) && (id->idCodeSize() == 0);
bool result = (id->idIns() == INS_jmp) && ((instrDescJmp*)id)->idjIsRemovableJmpCandidate;

// A zero size jump instruction can only be the one that is marked
// as removable candidate.
assert(!result || ((instrDescJmp*)id)->idjIsRemovableJmpCandidate);
// For a jump that is considered for removal but not removed,
// idjIsRemovableJmpCandidate should be set to false.
// Else if the jump was removed, idjIsRemovableJmpCandidate should still be true.

#ifdef TARGET_AMD64
// On AMD64, if the removed jump instruction was after a call instruction,
// a nop should have been inserted, hence a code size of 1.
// (See clr-abi.md for details on why the nop is needed)
if (result && (id->idCodeSize() != 0))
{
assert(id->idCodeSize() == 1);
assert(((instrDescJmp*)id)->idjIsAfterCall);
}
else
#endif // TARGET_AMD64
{
// A zero size jump instruction can only be the one that is marked
// as removable candidate.
assert(!result || (id->idCodeSize() == 0));
}

return result;
}
Expand Down Expand Up @@ -9082,6 +9099,11 @@ void emitter::emitIns_J(instruction ins,
int instrCount /* = 0 */,
bool isRemovableJmpCandidate /* = false */)
{
#ifdef TARGET_AMD64
// Check emitter::emitLastIns before it is updated
const bool lastInsIsCall = emitIsLastInsCall();
#endif // TARGET_AMD64

UNATIVE_OFFSET sz;
instrDescJmp* id = emitNewInstrJmp();

Expand Down Expand Up @@ -9109,9 +9131,20 @@ void emitter::emitIns_J(instruction ins,
}
#endif // DEBUG

emitContainsRemovableJmpCandidates |= isRemovableJmpCandidate;
id->idjIsRemovableJmpCandidate = isRemovableJmpCandidate ? 1 : 0;
id->idjShort = 0;
if (isRemovableJmpCandidate)
{
emitContainsRemovableJmpCandidates = true;
id->idjIsRemovableJmpCandidate = 1;
#ifdef TARGET_AMD64
id->idjIsAfterCall = lastInsIsCall ? 1 : 0;
#endif // TARGET_AMD64
}
else
{
id->idjIsRemovableJmpCandidate = 0;
}

id->idjShort = 0;
if (dst != nullptr)
{
/* Assume the jump will be long */
Expand Down Expand Up @@ -16179,16 +16212,21 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
assert(id->idGCref() == GCT_NONE);
assert(id->idIsBound() || emitJmpInstHasNoCode(id));
instrDescJmp* jmp = (instrDescJmp*)id;

// TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
if (id->idCodeSize() != 0)
if (!jmp->idjIsRemovableJmpCandidate)
{
dst = emitOutputLJ(ig, dst, id);
}
else
#ifdef TARGET_AMD64
else if (id->idCodeSize() == 1)
{
assert(((instrDescJmp*)id)->idjIsRemovableJmpCandidate);
// Need to insert a nop if the removed jump was after a call
assert(jmp->idjIsAfterCall);
dst = emitOutputNOP(dst, 1);
}
#endif // TARGET_AMD64
sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
break;
}
Expand Down
Loading