Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenMP] Introduce the initial support for OpenMP kernel language #66844

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions clang/include/clang/AST/OpenMPClause.h
Original file line number Diff line number Diff line change
Expand Up @@ -9220,6 +9220,27 @@ class OMPXAttributeClause
}
};

/// This represents 'ompx_bare' clause in the '#pragma omp target teams ...'
/// directive.
///
/// \code
/// #pragma omp target teams ompx_bare
/// \endcode
/// In this example directive '#pragma omp target teams' has a 'ompx_bare'
/// clause.
class OMPXBareClause : public OMPNoChildClause<llvm::omp::OMPC_ompx_bare> {
public:
/// Build 'ompx_bare' clause.
///
/// \param StartLoc Starting location of the clause.
/// \param EndLoc Ending location of the clause.
OMPXBareClause(SourceLocation StartLoc, SourceLocation EndLoc)
: OMPNoChildClause(StartLoc, EndLoc) {}

/// Build an empty clause.
OMPXBareClause() = default;
};

} // namespace clang

#endif // LLVM_CLANG_AST_OPENMPCLAUSE_H
5 changes: 5 additions & 0 deletions clang/include/clang/AST/RecursiveASTVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -3890,6 +3890,11 @@ bool RecursiveASTVisitor<Derived>::VisitOMPXAttributeClause(
return true;
}

template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPXBareClause(OMPXBareClause *C) {
return true;
}

// FIXME: look at the following tricky-seeming exprs to see if we
// need to recurse on anything. These are ones that have methods
// returning decls or qualtypes or nestednamespecifier -- though I'm
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticParseKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,8 @@ def warn_clause_expected_string : Warning<
"expected string literal in 'clause %0' - ignoring">, InGroup<IgnoredPragmas>;
def err_omp_unexpected_clause : Error<
"unexpected OpenMP clause '%0' in directive '#pragma omp %1'">;
def err_omp_unexpected_clause_extension_only : Error<
"OpenMP clause '%0' is only available as extension, use '-fopenmp-extensions'">;
def err_omp_immediate_directive : Error<
"'#pragma omp %0' %select{|with '%2' clause }1cannot be an immediate substatement">;
def err_omp_expected_identifier_for_critical : Error<
Expand Down Expand Up @@ -1452,6 +1454,8 @@ def warn_unknown_declare_variant_isa_trait
"spelling or consider restricting the context selector with the "
"'arch' selector further">,
InGroup<SourceUsesOpenMP>;
def note_ompx_bare_clause : Note<
"OpenMP extension clause '%0' only allowed with '#pragma omp %1'">;
def note_omp_declare_variant_ctx_options
: Note<"context %select{set|selector|property}0 options are: %1">;
def warn_omp_declare_variant_expected
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -12447,6 +12447,10 @@ class Sema final {
SourceLocation LParenLoc,
SourceLocation EndLoc);

/// Called on a well-formed 'ompx_bare' clause.
OMPClause *ActOnOpenMPXBareClause(SourceLocation StartLoc,
SourceLocation EndLoc);

/// The kind of conversion being performed.
enum CheckedConversionKind {
/// An implicit conversion.
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/AST/OpenMPClause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
case OMPC_affinity:
case OMPC_when:
case OMPC_bind:
case OMPC_ompx_bare:
break;
default:
break;
Expand Down Expand Up @@ -2546,6 +2547,10 @@ void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) {
OS << ")";
}

void OMPClausePrinter::VisitOMPXBareClause(OMPXBareClause *Node) {
OS << "ompx_bare";
}

void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
VariantMatchInfo &VMI) const {
for (const OMPTraitSet &Set : Sets) {
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/StmtProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
}
void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
}
void OMPClauseProfiler::VisitOMPXBareClause(const OMPXBareClause *C) {}
} // namespace

void
Expand Down
59 changes: 42 additions & 17 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,9 @@ CGOpenMPRuntimeGPU::getExecutionMode() const {
return CurrentExecutionMode;
}

static CGOpenMPRuntimeGPU::DataSharingMode
getDataSharingMode(CodeGenModule &CGM) {
return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
: CGOpenMPRuntimeGPU::Generic;
CGOpenMPRuntimeGPU::DataSharingMode
CGOpenMPRuntimeGPU::getDataSharingMode() const {
return CurrentDataSharingMode;
}

/// Check for inner (nested) SPMD construct, if any
Expand Down Expand Up @@ -752,6 +751,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
EntryFunctionState EST;
WrapperFunctionsMap.clear();

[[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
assert(!IsBareKernel && "bare kernel should not be at generic mode");

// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
Expand All @@ -760,15 +762,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
: EST(EST) {}
void Enter(CodeGenFunction &CGF) override {
auto &RT =
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
auto &RT =
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
}
Expand Down Expand Up @@ -807,25 +807,39 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
EntryFunctionState EST;

bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU &RT;
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
bool IsBareKernel;
DataSharingMode Mode;

public:
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
CGOpenMPRuntimeGPU::EntryFunctionState &EST)
: RT(RT), EST(EST) {}
CGOpenMPRuntimeGPU::EntryFunctionState &EST,
bool IsBareKernel)
: RT(RT), EST(EST), IsBareKernel(IsBareKernel),
Mode(RT.CurrentDataSharingMode) {}
void Enter(CodeGenFunction &CGF) override {
if (IsBareKernel) {
RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
return;
}
RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
if (IsBareKernel) {
RT.CurrentDataSharingMode = Mode;
return;
}
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
}
} Action(*this, EST);
} Action(*this, EST, IsBareKernel);
CodeGen.setAction(Action);
IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
Expand All @@ -843,7 +857,8 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");

bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
if (Mode)
bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
if (Mode || IsBareKernel)
emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
else
Expand All @@ -867,6 +882,9 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())
return;

if (CGM.getLangOpts().OpenMPCUDAMode)
CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;

OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
"__omp_rtl_debug_kind");
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
Expand Down Expand Up @@ -1030,7 +1048,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
SourceLocation Loc,
bool WithSPMDCheck) {
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;

Expand Down Expand Up @@ -1142,7 +1160,7 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(

void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
bool WithSPMDCheck) {
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;

Expand Down Expand Up @@ -1178,11 +1196,18 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;

bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
// We don't emit any thread id function call in bare kernel, but because the
// outlined function has a pointer argument, we emit a nullptr here.
if (IsBareKernel)
OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
else
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
Expand Down Expand Up @@ -3273,7 +3298,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(

void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
const Decl *D) {
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;

assert(D && "Expected function or captured|block decl.");
Expand Down Expand Up @@ -3382,7 +3407,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
VarTy, Align);
}

if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return Address::invalid();

VD = VD->getCanonicalDecl();
Expand Down
29 changes: 18 additions & 11 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// Unknown execution mode (orphaned directive).
EM_Unknown,
};

/// Target codegen is specialized based on two data-sharing modes: CUDA, in
/// which the local variables are actually global threadlocal, and Generic, in
/// which the local variables are placed in global memory if they may escape
/// their declaration context.
enum DataSharingMode {
/// CUDA data sharing mode.
DS_CUDA,
/// Generic data-sharing mode.
DS_Generic,
};

private:
/// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
Expand All @@ -42,6 +54,8 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {

ExecutionMode getExecutionMode() const;

DataSharingMode getDataSharingMode() const;

/// Get barrier to synchronize all threads in a block.
void syncCTAThreads(CodeGenFunction &CGF);

Expand Down Expand Up @@ -297,17 +311,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) override;

/// Target codegen is specialized based on two data-sharing modes: CUDA, in
/// which the local variables are actually global threadlocal, and Generic, in
/// which the local variables are placed in global memory if they may escape
/// their declaration context.
enum DataSharingMode {
/// CUDA data sharing mode.
CUDA,
/// Generic data-sharing mode.
Generic,
};

/// Cleans up references to the objects in finished function.
///
void functionFinished(CodeGenFunction &CGF) override;
Expand Down Expand Up @@ -343,6 +346,10 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;

/// Track the data sharing mode when codegening directives within a target
/// region.
DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;

/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Parse/ParseOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3416,6 +3416,17 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
case OMPC_ompx_attribute:
Clause = ParseOpenMPOMPXAttributesClause(WrongDirective);
break;
case OMPC_ompx_bare:
if (WrongDirective)
Diag(Tok, diag::note_ompx_bare_clause)
<< getOpenMPClauseName(CKind) << "target teams";
if (!ErrorFound && !getLangOpts().OpenMPExtensions) {
Diag(Tok, diag::err_omp_unexpected_clause_extension_only)
<< getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
ErrorFound = true;
}
Clause = ParseOpenMPClause(CKind, WrongDirective);
break;
default:
break;
}
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17553,6 +17553,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
case OMPC_partial:
Res = ActOnOpenMPPartialClause(nullptr, StartLoc, /*LParenLoc=*/{}, EndLoc);
break;
case OMPC_ompx_bare:
Res = ActOnOpenMPXBareClause(StartLoc, EndLoc);
break;
case OMPC_if:
case OMPC_final:
case OMPC_num_threads:
Expand Down Expand Up @@ -24279,3 +24282,8 @@ OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
SourceLocation EndLoc) {
return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc);
}

OMPClause *Sema::ActOnOpenMPXBareClause(SourceLocation StartLoc,
SourceLocation EndLoc) {
return new (Context) OMPXBareClause(StartLoc, EndLoc);
}
14 changes: 14 additions & 0 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -2391,6 +2391,15 @@ class TreeTransform {
EndLoc);
}

/// Build a new OpenMP 'ompx_bare' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
/// Subclasses may override this routine to provide different behavior.
OMPClause *RebuildOMPXBareClause(SourceLocation StartLoc,
SourceLocation EndLoc) {
return getSema().ActOnOpenMPXBareClause(StartLoc, EndLoc);
}

/// Build a new OpenMP 'align' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
Expand Down Expand Up @@ -10804,6 +10813,11 @@ TreeTransform<Derived>::TransformOMPXAttributeClause(OMPXAttributeClause *C) {
NewAttrs, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}

template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPXBareClause(OMPXBareClause *C) {
return getDerived().RebuildOMPXBareClause(C->getBeginLoc(), C->getEndLoc());
}

//===----------------------------------------------------------------------===//
// Expression transformation
//===----------------------------------------------------------------------===//
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10446,6 +10446,9 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_ompx_attribute:
C = new (Context) OMPXAttributeClause();
break;
case llvm::omp::OMPC_ompx_bare:
C = new (Context) OMPXBareClause();
break;
#define OMP_CLAUSE_NO_CLASS(Enum, Str) \
case llvm::omp::Enum: \
break;
Expand Down Expand Up @@ -11547,6 +11550,8 @@ void OMPClauseReader::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
C->setLocEnd(Record.readSourceLocation());
}

void OMPClauseReader::VisitOMPXBareClause(OMPXBareClause *C) {}

OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
TI.Sets.resize(readUInt32());
Expand Down
Loading