Skip to content

Commit

Permalink
[OpenMP] Codegen support for 'target parallel' on the host.
Browse files Browse the repository at this point in the history
This patch adds support for codegen of 'target parallel' on the host.
It is also the first combined directive that requires two or more
captured statements.  Support for this functionality is included in
the patch.

A combined directive such as 'target parallel' has two captured
statements, one for the 'target' and the other for the 'parallel'
region.  Two captured statements are required because each has
different implicit parameters (see SemaOpenMP.cpp).  For example,
the 'parallel' has 'global_tid' and 'bound_tid' while the 'target'
does not.  The patch adds support for handling multiple captured
statements based on the combined directive.

When codegen'ing the 'target parallel' directive, the 'target'
outlined function is created using the outer captured statement
and the 'parallel' outlined function is created using the inner
captured statement.

Reviewers: ABataev
Differential Revision: https://reviews.llvm.org/D28753

llvm-svn: 292374
arpith-jacob committed Jan 18, 2017
1 parent 771db6f commit 6801957
Showing 15 changed files with 1,612 additions and 42 deletions.
20 changes: 20 additions & 0 deletions clang/include/clang/AST/StmtOpenMP.h
Original file line number Diff line number Diff line change
@@ -198,6 +198,26 @@ class OMPExecutableDirective : public Stmt {
return const_cast<Stmt *>(*child_begin());
}

/// \brief Returns the captured statement associated with the
/// component region within the (combined) directive.
//
// \param RegionKind Component region kind.
CapturedStmt *getCapturedStmt(OpenMPDirectiveKind RegionKind) const {
ArrayRef<OpenMPDirectiveKind> CaptureRegions =
getOpenMPCaptureRegions(getDirectiveKind());
assert(std::any_of(
CaptureRegions.begin(), CaptureRegions.end(),
[=](const OpenMPDirectiveKind K) { return K == RegionKind; }) &&
"RegionKind not found in OpenMP CaptureRegions.");
auto *CS = cast<CapturedStmt>(getAssociatedStmt());
for (auto ThisCaptureRegion : CaptureRegions) {
if (ThisCaptureRegion == RegionKind)
return CS;
CS = cast<CapturedStmt>(CS->getCapturedStmt());
}
llvm_unreachable("Incorrect RegionKind specified for directive.");
}

OpenMPDirectiveKind getDirectiveKind() const { return Kind; }

static bool classof(const Stmt *S) {
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/OpenMPKinds.h
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_BASIC_OPENMPKINDS_H
#define LLVM_CLANG_BASIC_OPENMPKINDS_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"

namespace clang {
@@ -234,6 +235,10 @@ bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind);
/// directives that need loop bound sharing across loops outlined in nested
/// functions
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind);

/// Return the captured regions of an OpenMP directive.
llvm::ArrayRef<OpenMPDirectiveKind>
getOpenMPCaptureRegions(OpenMPDirectiveKind DKind);
}

#endif
3 changes: 3 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
@@ -8340,6 +8340,9 @@ class Sema {
return IsInOpenMPDeclareTargetContext;
}

/// Return the number of captured regions created for an OpenMP directive.
static int getOpenMPCaptureLevels(OpenMPDirectiveKind Kind);

/// \brief Initialization of captured region for OpenMP region.
void ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope);
/// \brief End of OpenMP region.
98 changes: 98 additions & 0 deletions clang/lib/Basic/OpenMPKinds.cpp
Original file line number Diff line number Diff line change
@@ -863,3 +863,101 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
Kind == OMPD_target_teams_distribute_parallel_for_simd ||
Kind == OMPD_target_teams_distribute_simd;
}

ArrayRef<OpenMPDirectiveKind>
clang::getOpenMPCaptureRegions(OpenMPDirectiveKind DKind) {
assert(DKind <= OMPD_unknown);
switch (DKind) {
case OMPD_parallel:
case OMPD_parallel_for:
case OMPD_parallel_for_simd:
case OMPD_parallel_sections:
return {OMPD_parallel};
case OMPD_teams:
return {OMPD_teams};
case OMPD_target_teams:
return {OMPD_target_teams};
case OMPD_simd:
return {OMPD_simd};
case OMPD_for:
return {OMPD_for};
case OMPD_for_simd:
return {OMPD_for_simd};
case OMPD_sections:
return {OMPD_sections};
case OMPD_section:
return {OMPD_section};
case OMPD_single:
return {OMPD_single};
case OMPD_master:
return {OMPD_master};
case OMPD_critical:
return {OMPD_critical};
case OMPD_taskgroup:
return {OMPD_taskgroup};
case OMPD_distribute:
return {OMPD_distribute};
case OMPD_ordered:
return {OMPD_ordered};
case OMPD_atomic:
return {OMPD_atomic};
case OMPD_target_data:
return {OMPD_target_data};
case OMPD_target:
return {OMPD_target};
case OMPD_target_parallel_for:
return {OMPD_target_parallel_for};
case OMPD_target_parallel_for_simd:
return {OMPD_target_parallel_for_simd};
case OMPD_target_simd:
return {OMPD_target_simd};
case OMPD_task:
return {OMPD_task};
case OMPD_taskloop:
return {OMPD_taskloop};
case OMPD_taskloop_simd:
return {OMPD_taskloop_simd};
case OMPD_distribute_parallel_for_simd:
return {OMPD_distribute_parallel_for_simd};
case OMPD_distribute_simd:
return {OMPD_distribute_simd};
case OMPD_distribute_parallel_for:
return {OMPD_distribute_parallel_for};
case OMPD_teams_distribute:
return {OMPD_teams_distribute};
case OMPD_teams_distribute_simd:
return {OMPD_teams_distribute_simd};
case OMPD_teams_distribute_parallel_for_simd:
return {OMPD_teams_distribute_parallel_for_simd};
case OMPD_teams_distribute_parallel_for:
return {OMPD_teams_distribute_parallel_for};
case OMPD_target_teams_distribute:
return {OMPD_target_teams_distribute};
case OMPD_target_teams_distribute_parallel_for:
return {OMPD_target_teams_distribute_parallel_for};
case OMPD_target_teams_distribute_parallel_for_simd:
return {OMPD_target_teams_distribute_parallel_for_simd};
case OMPD_target_teams_distribute_simd:
return {OMPD_target_teams_distribute_simd};
case OMPD_target_parallel:
return {OMPD_target, OMPD_parallel};
case OMPD_threadprivate:
case OMPD_taskyield:
case OMPD_barrier:
case OMPD_taskwait:
case OMPD_cancellation_point:
case OMPD_cancel:
case OMPD_flush:
case OMPD_target_enter_data:
case OMPD_target_exit_data:
case OMPD_declare_reduction:
case OMPD_declare_simd:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_target_update:
llvm_unreachable("OpenMP Directive is not allowed");
case OMPD_unknown:
llvm_unreachable("Unknown OpenMP directive");
}
return {OMPD_unknown};
}
30 changes: 25 additions & 5 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
@@ -842,12 +842,12 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}

llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
static llvm::Value *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
@@ -857,11 +857,27 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, getOutlinedHelperName());
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}

llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
return emitParallelOrTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}

llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
return emitParallelOrTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}

llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
@@ -6124,6 +6140,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
CodeGenFunction::EmitOMPTargetDeviceFunction(
CGM, ParentName, cast<OMPTargetDirective>(*S));
break;
case Stmt::OMPTargetParallelDirectiveClass:
CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
break;
default:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
15 changes: 14 additions & 1 deletion clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
@@ -527,6 +527,7 @@ class CGOpenMPRuntime {
/// Get combiner/initializer for the specified user-defined reduction, if any.
virtual std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl *D);

/// \brief Emits outlined function for the specified OpenMP parallel directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
@@ -535,7 +536,19 @@ class CGOpenMPRuntime {
/// \param InnermostKind Kind of innermost directive (for simple directives it
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
virtual llvm::Value *emitParallelOrTeamsOutlinedFunction(
virtual llvm::Value *emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);

/// \brief Emits outlined function for the specified OpenMP teams directive
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
/// \param InnermostKind Kind of innermost directive (for simple directives it
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
virtual llvm::Value *emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);

28 changes: 13 additions & 15 deletions clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
Original file line number Diff line number Diff line change
@@ -478,24 +478,22 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
const Expr *ThreadLimit,
SourceLocation Loc) {}

llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar,
InnermostKind, CodeGen);
}

llvm::Function *OutlinedFun = nullptr;
if (isa<OMPTeamsDirective>(D)) {
llvm::Value *OutlinedFunVal =
CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
} else {
llvm::Value *OutlinedFunVal =
CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
}
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {

llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);

return OutlinedFun;
}
25 changes: 20 additions & 5 deletions clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
Original file line number Diff line number Diff line change
@@ -138,7 +138,7 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
const Expr *ThreadLimit, SourceLocation Loc) override;

/// \brief Emits inlined function for the specified OpenMP parallel
// directive but an inlined function for teams.
// directive.
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
@@ -147,10 +147,25 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
llvm::Value *
emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) override;
emitParallelOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) override;

/// \brief Emits inlined function for the specified OpenMP teams
// directive.
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
/// kmp_int32 BoundID, struct context_vars*).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
/// \param InnermostKind Kind of innermost directive (for simple directives it
/// is a directive itself, for combined - its innermost directive).
/// \param CodeGen Code generation sequence for the \a D directive.
llvm::Value *
emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) override;

/// \brief Emits code for teams call of the \a OutlinedFn with
/// variables captured in a record which address is stored in \a
46 changes: 37 additions & 9 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
@@ -1213,10 +1213,9 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
emitParallelOrTeamsOutlinedFunction(S,
*CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
@@ -3497,10 +3496,9 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
emitParallelOrTeamsOutlinedFunction(S,
*CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);

const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
@@ -3755,9 +3753,39 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}

static void emitTargetParallelRegion(CodeGenFunction &CGF,
const OMPTargetParallelDirective &S,
PrePostActionTy &Action) {
// Get the captured statement associated with the 'parallel' region.
auto *CS = S.getCapturedStmt(OMPD_parallel);
Action.Enter(CGF);
auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
// TODO: Add support for clauses.
CGF.EmitStmt(CS->getCapturedStmt());
};
emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen);
}

void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
const OMPTargetParallelDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetParallelRegion(CGF, S, Action);
};
llvm::Function *Fn;
llvm::Constant *Addr;
// Emit target region as a standalone region.
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
assert(Fn && Addr && "Target device function emission failed.");
}

void CodeGenFunction::EmitOMPTargetParallelDirective(
const OMPTargetParallelDirective &S) {
// TODO: codegen for target parallel.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetParallelRegion(CGF, S, Action);
};
emitCommonOMPTargetDirective(*this, S, CodeGen);
}

void CodeGenFunction::EmitOMPTargetParallelForDirective(
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
@@ -2708,6 +2708,9 @@ class CodeGenFunction : public CodeGenTypeCache {
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
StringRef ParentName,
const OMPTargetDirective &S);
static void
EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
const OMPTargetParallelDirective &S);
/// \brief Emit inner loop of the worksharing/simd construct.
///
/// \param S Directive, for which the inner loop must be emitted.
Loading

0 comments on commit 6801957

Please sign in to comment.