Skip to content

Commit

Permalink
Add buffer for CUDA fatbinary in CGCUDANV backend
Browse files Browse the repository at this point in the history
- the buffer is needed to send the fatbinary code from the device JIT to the host JIT without file I/O
- the modification was needed because the backend usse the function llvm::MemoryBuffer::getFileOrSTDIN() which does not support a virtual file system
- behavior: If the buffer is valid, use the buffer. Otherwise load fatbinary code from file.
  • Loading branch information
SimeonEhrig authored and Axel-Naumann committed Nov 5, 2019
1 parent 41df26b commit fe12679
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// object file.
std::vector<std::string> CudaGpuBinaryFileNames;

/// A buffer that contains the fatbinary code to forward to CUDA runtime
/// back-end for incorporating them into host-side object file.
std::shared_ptr<llvm::SmallVectorImpl<char>> CudaGpuBinaryBuffer;

/// The name of the file to which the backend should save YAML optimization
/// records.
std::string OptRecordFile;
Expand Down
45 changes: 28 additions & 17 deletions interpreter/llvm/src/tools/clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// No need to generate ctors/dtors if there are no GPU binaries.
if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
if (!CGM.getCodeGenOpts().CudaGpuBinaryBuffer &&
CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
return nullptr;

// void __cuda_register_globals(void* handle);
Expand All @@ -281,16 +282,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// to be cleaned up in destructor on exit. Then associate all known kernels
// with the GPU binary handle so CUDA runtime can figure out what to call on
// the GPU side.
for (const std::string &GpuBinaryFileName :
CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
if (std::error_code EC = GpuBinaryOrErr.getError()) {
CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
<< EC.message();
continue;
}

auto buildFatbinarySection = [&](const llvm::StringRef FatbinCode) {
const char *FatbinConstantName =
CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
// NVIDIA's cuobjdump looks for fatbins in this section.
Expand All @@ -305,14 +297,12 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Fatbin version.
Values.addInt(IntTy, 1);
// Data.
Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(),
"", FatbinConstantName, 8));
Values.add(makeConstantString(FatbinCode, "", FatbinConstantName, 8));
// Unused in fatbin v1.
Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
llvm::GlobalVariable *FatbinWrapper =
Values.finishAndCreateGlobal("__cuda_fatbin_wrapper",
CGM.getPointerAlign(),
/*constant*/ true);
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
"__cuda_fatbin_wrapper", CGM.getPointerAlign(),
/*constant*/ true);
FatbinWrapper->setSection(FatbinSectionName);

// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
Expand All @@ -331,6 +321,27 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {

// Save GpuBinaryHandle so we can unregister it in destructor.
GpuBinaryHandles.push_back(GpuBinaryHandle);
};

// If there is a valid buffer with fatbinary code, embed the buffer.
// Otherwise, embed fatbinary code from files.
if (CGM.getCodeGenOpts().CudaGpuBinaryBuffer) {
const llvm::StringRef GpuBinaryBuffer(
CGM.getCodeGenOpts().CudaGpuBinaryBuffer->data(),
CGM.getCodeGenOpts().CudaGpuBinaryBuffer->size());
buildFatbinarySection(GpuBinaryBuffer);
} else {
for (const std::string &GpuBinaryFileName :
CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
if (std::error_code EC = GpuBinaryOrErr.getError()) {
CGM.getDiags().Report(diag::err_cannot_open_file)
<< GpuBinaryFileName << EC.message();
continue;
}
buildFatbinarySection(GpuBinaryOrErr.get()->getBuffer());
}
}

CtorBuilder.CreateRetVoid();
Expand Down

0 comments on commit fe12679

Please sign in to comment.