Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allocation Profiler: Types for all allocations #50333

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1006,11 +1006,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
return jl_valueof(&v->header);
}

// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
// External-facing version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
{
jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag);
return val;
}

Expand Down Expand Up @@ -1316,12 +1315,18 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset
return jl_valueof(v);
}

// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
// Record an allocation, called into by LLVM-generated code.
JL_DLLEXPORT void jl_maybe_record_alloc_to_profile(jl_value_t* val, int osize,
jl_value_t* type)
{
maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
}

// External-facing version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
int osize)
{
jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag);
return val;
}

Expand Down Expand Up @@ -3776,7 +3781,8 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
SetLastError(last_error);
#endif
errno = last_errno;
maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag);
// gc_managed_realloc_ is currently used exclusively for resizing array buffers.
maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
return b;
}

Expand Down
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@
XX(jl_gc_new_weakref_th) \
XX(jl_gc_num) \
XX(jl_gc_pool_alloc) \
XX(jl_maybe_record_alloc_to_profile) \
XX(jl_gc_queue_multiroot) \
XX(jl_gc_queue_root) \
XX(jl_gc_safepoint) \
Expand Down
8 changes: 5 additions & 3 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct FinalLowerGC: private JuliaPassContext {
Function *queueRootFunc;
Function *poolAllocFunc;
Function *bigAllocFunc;
Function *recordAllocFunc;
Function *allocTypedFunc;
Instruction *pgcstack;
Type *T_size;
Expand Down Expand Up @@ -253,10 +254,11 @@ bool FinalLowerGC::doInitialization(Module &M) {
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
recordAllocFunc = getOrDeclare(jl_well_known::GCRecordAllocToProfile);
allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
T_size = M.getDataLayout().getIntPtrType(M.getContext());

GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, recordAllocFunc, allocTypedFunc};
unsigned j = 0;
for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
if (!functionList[i])
Expand All @@ -272,8 +274,8 @@ bool FinalLowerGC::doInitialization(Module &M) {

bool FinalLowerGC::doFinalization(Module &M)
{
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, recordAllocFunc, allocTypedFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = recordAllocFunc = allocTypedFunc = nullptr;
auto used = M.getGlobalVariable("llvm.compiler.used");
if (!used)
return false;
Expand Down
18 changes: 18 additions & 0 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2404,6 +2404,24 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
store->setOrdering(AtomicOrdering::Unordered);
store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);

auto recordAllocIntrinsic = getOrDeclare(jl_well_known::GCRecordAllocToProfile);
auto value = newI;
//auto record_alloc =
builder.CreateCall(
recordAllocIntrinsic,
{
value,
builder.CreateIntCast(
CI->getArgOperand(1),
recordAllocIntrinsic->getFunctionType()->getParamType(1),
false),
builder.CreatePtrToInt(tag, T_size),
});
// TODO: is this needed? What is it?
//record_alloc->setOrdering(AtomicOrdering::Unordered);
//record_alloc->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);


// Replace uses of the call to `julia.gc_alloc_obj` with the call to
// `julia.gc_alloc_bytes`.
CI->replaceAllUsesWith(newI);
Expand Down
24 changes: 24 additions & 0 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ namespace jl_intrinsics {
namespace jl_well_known {
static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
static const char *GC_RECORD_ALLOC_TO_PROFILE_NAME = XSTR(jl_maybe_record_alloc_to_profile);
static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);

Expand Down Expand Up @@ -275,6 +276,29 @@ namespace jl_well_known {
return addGCAllocAttributes(poolAllocFunc);
});

const WellKnownFunctionDescription GCRecordAllocToProfile(
GC_RECORD_ALLOC_TO_PROFILE_NAME,
[](Type *T_size) {
auto &ctx = T_size->getContext();
auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
auto T_void = Type::getVoidTy(ctx);
auto recordAllocFunc = Function::Create(
FunctionType::get(
T_void,
{
//T_prjlvalue,
T_size,
Type::getInt32Ty(ctx),
T_size,
},
false),
Function::ExternalLinkage,
GC_RECORD_ALLOC_TO_PROFILE_NAME);
// TODO: what is this?
//recordAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
return recordAllocFunc;
});

const WellKnownFunctionDescription GCQueueRoot(
GC_QUEUE_ROOT_NAME,
[](Type *T_size) {
Expand Down
3 changes: 3 additions & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ namespace jl_well_known {
// `jl_gc_pool_alloc`: allocates bytes.
extern const WellKnownFunctionDescription GCPoolAlloc;

// `jl_maybe_record_alloc_to_profile`: records an allocation to the alloc profile.
extern const WellKnownFunctionDescription GCRecordAllocToProfile;

// `jl_gc_queue_root`: queues a GC root.
extern const WellKnownFunctionDescription GCQueueRoot;

Expand Down
32 changes: 32 additions & 0 deletions stdlib/Profile/test/allocs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,35 @@ end
@test length(prof.allocs) >= 1
@test length([a for a in prof.allocs if a.type == String]) >= 1
end

@testset "alloc profiler catches allocs from codegen" begin
@eval begin
struct MyType x::Int; y::Int end
Base.:(+)(n::Number, x::MyType) = n + x.x + x.y
foo(a, x) = a[1] + x
wrapper(a) = foo(a, MyType(0,1))
end
a = Any[1,2,3]
# warmup
wrapper(a)

@eval Allocs.@profile sample_rate=1 wrapper($a)

prof = Allocs.fetch()
Allocs.clear()

@test length(prof.allocs) >= 1
@test length([a for a in prof.allocs if a.type == MyType]) >= 1
end

@testset "alloc profiler catches allocs from buffer resize" begin
a = Int[]
Allocs.@profile sample_rate=1 for _ in 1:100; push!(a, 1); end

prof = Allocs.fetch()
Allocs.clear()

@test length(prof.allocs) >= 1
@test length([a for a in prof.allocs if a.type == Profile.Allocs.BufferType]) >= 1
end