Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDA][HIP][NFC] add CodeGenModule::shouldEmitCUDAGlobalVar #98543

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
return D->isImplicit();
}

bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const {
assert(LangOpts.CUDA && "Should not be called by non-CUDA languages");
// We need to emit host-side 'shadows' for all global
// device-side variables because the CUDA runtime needs their
// size and host-side address in order to provide access to
// their device-side incarnations.
return !LangOpts.CUDAIsDevice || Global->hasAttr<CUDADeviceAttr>() ||
Global->hasAttr<CUDAConstantAttr>() ||
Global->hasAttr<CUDASharedAttr>() ||
Global->getType()->isCUDADeviceBuiltinSurfaceType() ||
Global->getType()->isCUDADeviceBuiltinTextureType();
}

void CodeGenModule::EmitGlobal(GlobalDecl GD) {
const auto *Global = cast<ValueDecl>(GD.getDecl());

Expand All @@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Non-constexpr non-lambda implicit host device functions are not emitted
// unless they are used on device side.
if (LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {
assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
"Expected Variable or Function");
if (const auto *VD = dyn_cast<VarDecl>(Global)) {
if (!shouldEmitCUDAGlobalVar(VD))
return;
} else if (LangOpts.CUDAIsDevice) {
const auto *FD = dyn_cast<FunctionDecl>(Global);
if ((!Global->hasAttr<CUDADeviceAttr>() ||
(LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
(LangOpts.OffloadImplicitHostDeviceTemplates &&
hasImplicitAttr<CUDAHostAttr>(FD) &&
hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() &&
!isLambdaCallOperator(FD) &&
!getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) &&
!Global->hasAttr<CUDAGlobalAttr>() &&
!Global->hasAttr<CUDAConstantAttr>() &&
!Global->hasAttr<CUDASharedAttr>() &&
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
!Global->getType()->isCUDADeviceBuiltinTextureType() &&
!(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
!Global->hasAttr<CUDAHostAttr>()))
return;
} else {
// We need to emit host-side 'shadows' for all global
// device-side variables because the CUDA runtime needs their
// size and host-side address in order to provide access to
// their device-side incarnations.

// So device-only functions are the only things we skip.
if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
Global->hasAttr<CUDADeviceAttr>())
return;

assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
"Expected Variable or Function");
}
// Device-only functions are the only things we skip.
} else if (!Global->hasAttr<CUDAHostAttr>() &&
Global->hasAttr<CUDADeviceAttr>())
return;
}

if (LangOpts.OpenMP) {
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,9 @@ class CodeGenModule : public CodeGenTypeCache {

bool isTriviallyRecursive(const FunctionDecl *F);
bool shouldEmitFunction(GlobalDecl GD);
// Whether a global variable should be emitted by CUDA/HIP host/device
// related attributes.
bool shouldEmitCUDAGlobalVar(const VarDecl *VD) const;
bool shouldOpportunisticallyEmitVTables();
/// Map used to be sure we don't emit the same CompoundLiteral twice.
llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *>
Expand Down
Loading