Skip to content

Commit

Permalink
Refactor final-gc-lowering (#50741)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi authored Aug 1, 2023
1 parent 8066c29 commit f00957b
Show file tree
Hide file tree
Showing 12 changed files with 84 additions and 195 deletions.
2 changes: 1 addition & 1 deletion src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,6 @@
YY(LLVMExtraMPMAddCPUFeaturesPass) \
YY(LLVMExtraMPMAddRemoveNIPass) \
YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
YY(LLVMExtraMPMAddFinalLowerGCPass) \
YY(LLVMExtraMPMAddMultiVersioningPass) \
YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
Expand All @@ -595,6 +594,7 @@
YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \
YY(LLVMExtraFPMAddLowerExcHandlersPass) \
YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
YY(LLVMExtraFPMAddFinalLowerGCPass) \
YY(LLVMExtraLPMAddJuliaLICMPass) \
YY(JLJITGetLLVMOrcExecutionSession) \
YY(JLJITGetJuliaOJIT) \
Expand Down
229 changes: 58 additions & 171 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ using namespace llvm;

struct FinalLowerGC: private JuliaPassContext {
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
bool doFinalization(Module &M);

private:
Function *queueRootFunc;
Expand All @@ -53,7 +51,7 @@ struct FinalLowerGC: private JuliaPassContext {
Type *T_size;

// Lowers a `julia.new_gc_frame` intrinsic.
Value *lowerNewGCFrame(CallInst *target, Function &F);
void lowerNewGCFrame(CallInst *target, Function &F);

// Lowers a `julia.push_gc_frame` intrinsic.
void lowerPushGCFrame(CallInst *target, Function &F);
Expand All @@ -62,26 +60,26 @@ struct FinalLowerGC: private JuliaPassContext {
void lowerPopGCFrame(CallInst *target, Function &F);

// Lowers a `julia.get_gc_frame_slot` intrinsic.
Value *lowerGetGCFrameSlot(CallInst *target, Function &F);
void lowerGetGCFrameSlot(CallInst *target, Function &F);

// Lowers a `julia.gc_alloc_bytes` intrinsic.
Value *lowerGCAllocBytes(CallInst *target, Function &F);
void lowerGCAllocBytes(CallInst *target, Function &F);

// Lowers a `julia.queue_gc_root` intrinsic.
Value *lowerQueueGCRoot(CallInst *target, Function &F);
void lowerQueueGCRoot(CallInst *target, Function &F);

// Lowers a `julia.safepoint` intrinsic.
Value *lowerSafepoint(CallInst *target, Function &F);
void lowerSafepoint(CallInst *target, Function &F);
};

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
{
++NewGCFrameCount;
assert(target->arg_size() == 1);
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);

// Create the GC frame.
IRBuilder<> builder(target->getNextNode());
IRBuilder<> builder(target);
auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2));
gcframe_alloca->setAlignment(Align(16));
// addrspacecast as needed for non-0 alloca addrspace
Expand All @@ -92,7 +90,8 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
auto ptrsize = F.getParent()->getDataLayout().getPointerSize();
builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);

return gcframe;
target->replaceAllUsesWith(gcframe);
target->eraseFromParent();
}

void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
Expand All @@ -102,8 +101,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
auto gcframe = target->getArgOperand(0);
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);

IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
IRBuilder<> builder(target);
StoreInst *inst = builder.CreateAlignedStore(
ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
builder.CreateBitCast(
Expand All @@ -123,6 +121,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
gcframe,
builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)),
Align(sizeof(void*)));
target->eraseFromParent();
}

void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
Expand All @@ -131,8 +130,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
assert(target->arg_size() == 1);
auto gcframe = target->getArgOperand(0);

IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(target);
IRBuilder<> builder(target);
Instruction *gcpop =
cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)), "frame.prev");
Expand All @@ -143,55 +141,53 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
PointerType::get(T_prjlvalue, 0)),
Align(sizeof(void*)));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
target->eraseFromParent();
}

Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
{
++GetGCFrameSlotCount;
assert(target->arg_size() == 2);
auto gcframe = target->getArgOperand(0);
auto index = target->getArgOperand(1);

// Initialize an IR builder.
IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(target);
IRBuilder<> builder(target);

// The first two slots are reserved, so we'll add two to the index.
index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));

// Lower the intrinsic as a GEP.
auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
gep->takeName(target);
return gep;
target->replaceAllUsesWith(gep);
target->eraseFromParent();
}

Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
{
++QueueGCRootCount;
assert(target->arg_size() == 1);
target->setCalledFunction(queueRootFunc);
return target;
}

Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
{
++SafepointCount;
assert(target->arg_size() == 1);
IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(target);
IRBuilder<> builder(target);
Value* signal_page = target->getOperand(0);
Value* load = builder.CreateLoad(T_size, signal_page, true);
return load;
builder.CreateLoad(T_size, signal_page, true);
target->eraseFromParent();
}

Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
{
++GCAllocBytesCount;
assert(target->arg_size() == 3);
CallInst *newI;

IRBuilder<> builder(target);
builder.SetCurrentDebugLocation(target->getDebugLoc());
auto ptls = target->getArgOperand(0);
auto type = target->getArgOperand(2);
Attribute derefAttr;
Expand Down Expand Up @@ -222,86 +218,13 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
newI->setAttributes(newI->getCalledFunction()->getAttributes());
newI->addRetAttr(derefAttr);
newI->takeName(target);
return newI;
}

bool FinalLowerGC::doInitialization(Module &M) {
// Initialize platform-agnostic references.
initAll(M);

// Initialize platform-specific references.
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
T_size = M.getDataLayout().getIntPtrType(M.getContext());

GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
unsigned j = 0;
for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
if (!functionList[i])
continue;
if (i != j)
functionList[j] = functionList[i];
j++;
}
if (j != 0)
appendToCompilerUsed(M, ArrayRef<GlobalValue*>(functionList, j));
return true;
}

bool FinalLowerGC::doFinalization(Module &M)
{
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
auto used = M.getGlobalVariable("llvm.compiler.used");
if (!used)
return false;
SmallPtrSet<Constant*, 16> InitAsSet(
functionList,
functionList + sizeof(functionList) / sizeof(void*));
bool changed = false;
SmallVector<Constant*, 16> init;
ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
for (auto &Op : CA->operands()) {
Constant *C = cast_or_null<Constant>(Op);
if (InitAsSet.count(C->stripPointerCasts())) {
changed = true;
continue;
}
init.push_back(C);
}
if (!changed)
return false;
used->eraseFromParent();
if (init.empty())
return true;
ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, init), "llvm.compiler.used");
used->setSection("llvm.metadata");
return true;
}

template<typename TIterator>
static void replaceInstruction(
Instruction *oldInstruction,
Value *newInstruction,
TIterator &it)
{
if (newInstruction != oldInstruction) {
oldInstruction->replaceAllUsesWith(newInstruction);
it = oldInstruction->eraseFromParent();
}
else {
++it;
}
target->replaceAllUsesWith(newI);
target->eraseFromParent();
}

bool FinalLowerGC::runOnFunction(Function &F)
{
// Check availability of functions again since they might have been deleted.
initFunctions(*F.getParent());
initAll(*F.getParent());
if (!pgcstack_getter && !adoptthread_func) {
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
return false;
Expand All @@ -314,55 +237,39 @@ bool FinalLowerGC::runOnFunction(Function &F)
return false;
}
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");

// Acquire intrinsic functions.
auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame);
auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame);
auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
auto safepointFunc = getOrNull(jl_intrinsics::safepoint);
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());

// Lower all calls to supported intrinsics.
for (BasicBlock &BB : F) {
for (auto it = BB.begin(); it != BB.end();) {
auto *CI = dyn_cast<CallInst>(&*it);
if (!CI) {
++it;
for (auto &BB : F) {
for (auto &I : make_early_inc_range(BB)) {
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
}

Value *callee = CI->getCalledOperand();
assert(callee);

if (callee == newGCFrameFunc) {
replaceInstruction(CI, lowerNewGCFrame(CI, F), it);
}
else if (callee == pushGCFrameFunc) {
lowerPushGCFrame(CI, F);
it = CI->eraseFromParent();
}
else if (callee == popGCFrameFunc) {
lowerPopGCFrame(CI, F);
it = CI->eraseFromParent();
}
else if (callee == getGCFrameSlotFunc) {
replaceInstruction(CI, lowerGetGCFrameSlot(CI, F), it);
}
else if (callee == GCAllocBytesFunc) {
replaceInstruction(CI, lowerGCAllocBytes(CI, F), it);
}
else if (callee == queueGCRootFunc) {
replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
}
else if (callee == safepointFunc) {
lowerSafepoint(CI, F);
it = CI->eraseFromParent();
}
else {
++it;
}
#define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \
do { \
auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \
if (intrinsic == callee) { \
LOWER_INTRINSIC_FUNC(CI, F); \
} \
} while (0)

LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame);
LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes);
LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
LOWER_INTRINSIC(safepoint, lowerSafepoint);

#undef LOWER_INTRINSIC
}
}

Expand All @@ -380,44 +287,24 @@ struct FinalLowerGCLegacy: public FunctionPass {

private:
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;
bool doFinalization(Module &M) override;

FinalLowerGC finalLowerGC;
};

bool FinalLowerGCLegacy::runOnFunction(Function &F) {
return finalLowerGC.runOnFunction(F);
}

bool FinalLowerGCLegacy::doInitialization(Module &M) {
return finalLowerGC.doInitialization(M);
}

bool FinalLowerGCLegacy::doFinalization(Module &M) {
auto ret = finalLowerGC.doFinalization(M);
auto modified = finalLowerGC.runOnFunction(F);
#ifdef JL_VERIFY_PASSES
assert(!verifyLLVMIR(M));
assert(!verifyLLVMIR(F));
#endif
return ret;
return modified;
}


PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
{
auto finalLowerGC = FinalLowerGC();
bool modified = false;
modified |= finalLowerGC.doInitialization(M);
for (auto &F : M.functions()) {
if (F.isDeclaration())
continue;
modified |= finalLowerGC.runOnFunction(F);
}
modified |= finalLowerGC.doFinalization(M);
if (FinalLowerGC().runOnFunction(F)) {
#ifdef JL_VERIFY_PASSES
assert(!verifyLLVMIR(M));
assert(!verifyLLVMIR(F));
#endif
if (modified) {
return PreservedAnalyses::allInSet<CFGAnalyses>();
}
return PreservedAnalyses::all();
Expand Down
Loading

0 comments on commit f00957b

Please sign in to comment.