Skip to content

Commit b68e2db

Browse files
author
Samuel Antao
committedMar 3, 2016
[OpenMP] Code generation for teams - kernel launching
Summary: This patch implements the launching of a target region in the presence of a nested teams region, i.e calls tgt_target_teams with the required arguments gathered from the enclosed teams directive. The actual codegen of the region enclosed by the teams construct will be contributed in a separate patch. Reviewers: hfinkel, arpith-jacob, kkwli0, carlo.bertolli, ABataev Subscribers: cfe-commits, caomhin, fraggamuffin Differential Revision: http://reviews.llvm.org/D17019 llvm-svn: 262625
1 parent b840a6d commit b68e2db

File tree

3 files changed

+427
-8
lines changed

3 files changed

+427
-8
lines changed
 

‎clang/lib/CodeGen/CGOpenMPRuntime.cpp

+211-6
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,70 @@ class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
252252
StringRef HelperName;
253253
};
254254

255+
static void EmptyCodeGen(CodeGenFunction &) {
256+
llvm_unreachable("No codegen for expressions");
257+
}
258+
/// \brief API for generation of expressions captured in a innermost OpenMP
259+
/// region.
260+
class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
261+
public:
262+
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
263+
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
264+
OMPD_unknown,
265+
/*HasCancel=*/false),
266+
PrivScope(CGF) {
267+
// Make sure the globals captured in the provided statement are local by
268+
// using the privatization logic. We assume the same variable is not
269+
// captured more than once.
270+
for (auto &C : CS.captures()) {
271+
if (!C.capturesVariable() && !C.capturesVariableByCopy())
272+
continue;
273+
274+
const VarDecl *VD = C.getCapturedVar();
275+
if (VD->isLocalVarDeclOrParm())
276+
continue;
277+
278+
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
279+
/*RefersToEnclosingVariableOrCapture=*/false,
280+
VD->getType().getNonReferenceType(), VK_LValue,
281+
SourceLocation());
282+
PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
283+
return CGF.EmitLValue(&DRE).getAddress();
284+
});
285+
}
286+
(void)PrivScope.Privatize();
287+
}
288+
289+
/// \brief Lookup the captured field decl for a variable.
290+
const FieldDecl *lookup(const VarDecl *VD) const override {
291+
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
292+
return FD;
293+
return nullptr;
294+
}
295+
296+
/// \brief Emit the captured statement body.
297+
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
298+
llvm_unreachable("No body for expressions");
299+
}
300+
301+
/// \brief Get a variable or parameter for storing global thread id
302+
/// inside OpenMP construct.
303+
const VarDecl *getThreadIDVariable() const override {
304+
llvm_unreachable("No thread id for expressions");
305+
}
306+
307+
/// \brief Get the name of the capture helper.
308+
StringRef getHelperName() const override {
309+
llvm_unreachable("No helper name for expressions");
310+
}
311+
312+
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
313+
314+
private:
315+
/// Private scope to capture global variables.
316+
CodeGenFunction::OMPPrivateScope PrivScope;
317+
};
318+
255319
/// \brief RAII for emitting code of OpenMP constructs.
256320
class InlinedOpenMPRegionRAII {
257321
CodeGenFunction &CGF;
@@ -481,6 +545,10 @@ enum OpenMPRTLFunction {
481545
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
482546
// *arg_types);
483547
OMPRTL__tgt_target,
548+
// Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
549+
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
550+
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
551+
OMPRTL__tgt_target_teams,
484552
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
485553
OMPRTL__tgt_register_lib,
486554
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -1153,6 +1221,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
11531221
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
11541222
break;
11551223
}
1224+
case OMPRTL__tgt_target_teams: {
1225+
// Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1226+
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1227+
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1228+
llvm::Type *TypeParams[] = {CGM.Int32Ty,
1229+
CGM.VoidPtrTy,
1230+
CGM.Int32Ty,
1231+
CGM.VoidPtrPtrTy,
1232+
CGM.VoidPtrPtrTy,
1233+
CGM.SizeTy->getPointerTo(),
1234+
CGM.Int32Ty->getPointerTo(),
1235+
CGM.Int32Ty,
1236+
CGM.Int32Ty};
1237+
llvm::FunctionType *FnTy =
1238+
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1239+
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
1240+
break;
1241+
}
11561242
case OMPRTL__tgt_register_lib: {
11571243
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
11581244
QualType ParamTy =
@@ -3972,6 +4058,102 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
39724058
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
39734059
}
39744060

4061+
/// \brief Emit the num_teams clause of an enclosed teams directive at the
4062+
/// target region scope. If there is no teams directive associated with the
4063+
/// target directive, or if there is no num_teams clause associated with the
4064+
/// enclosed teams directive, return nullptr.
4065+
static llvm::Value *
4066+
emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4067+
CodeGenFunction &CGF,
4068+
const OMPExecutableDirective &D) {
4069+
4070+
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4071+
"teams directive expected to be "
4072+
"emitted only for the host!");
4073+
4074+
// FIXME: For the moment we do not support combined directives with target and
4075+
// teams, so we do not expect to get any num_teams clause in the provided
4076+
// directive. Once we support that, this assertion can be replaced by the
4077+
// actual emission of the clause expression.
4078+
assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
4079+
"Not expecting clause in directive.");
4080+
4081+
// If the current target region has a teams region enclosed, we need to get
4082+
// the number of teams to pass to the runtime function call. This is done
4083+
// by generating the expression in a inlined region. This is required because
4084+
// the expression is captured in the enclosing target environment when the
4085+
// teams directive is not combined with target.
4086+
4087+
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4088+
4089+
// FIXME: Accommodate other combined directives with teams when they become
4090+
// available.
4091+
if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
4092+
if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
4093+
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4094+
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4095+
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
4096+
return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
4097+
/*IsSigned=*/true);
4098+
}
4099+
4100+
// If we have an enclosed teams directive but no num_teams clause we use
4101+
// the default value 0.
4102+
return CGF.Builder.getInt32(0);
4103+
}
4104+
4105+
// No teams associated with the directive.
4106+
return nullptr;
4107+
}
4108+
4109+
/// \brief Emit the thread_limit clause of an enclosed teams directive at the
4110+
/// target region scope. If there is no teams directive associated with the
4111+
/// target directive, or if there is no thread_limit clause associated with the
4112+
/// enclosed teams directive, return nullptr.
4113+
static llvm::Value *
4114+
emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4115+
CodeGenFunction &CGF,
4116+
const OMPExecutableDirective &D) {
4117+
4118+
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
4119+
"teams directive expected to be "
4120+
"emitted only for the host!");
4121+
4122+
// FIXME: For the moment we do not support combined directives with target and
4123+
// teams, so we do not expect to get any thread_limit clause in the provided
4124+
// directive. Once we support that, this assertion can be replaced by the
4125+
// actual emission of the clause expression.
4126+
assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
4127+
"Not expecting clause in directive.");
4128+
4129+
// If the current target region has a teams region enclosed, we need to get
4130+
// the thread limit to pass to the runtime function call. This is done
4131+
// by generating the expression in a inlined region. This is required because
4132+
// the expression is captured in the enclosing target environment when the
4133+
// teams directive is not combined with target.
4134+
4135+
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
4136+
4137+
// FIXME: Accommodate other combined directives with teams when they become
4138+
// available.
4139+
if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
4140+
if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
4141+
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
4142+
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
4143+
llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
4144+
return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
4145+
/*IsSigned=*/true);
4146+
}
4147+
4148+
// If we have an enclosed teams directive but no thread_limit clause we use
4149+
// the default value 0.
4150+
return CGF.Builder.getInt32(0);
4151+
}
4152+
4153+
// No teams associated with the directive.
4154+
return nullptr;
4155+
}
4156+
39754157
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
39764158
const OMPExecutableDirective &D,
39774159
llvm::Value *OutlinedFn,
@@ -4100,7 +4282,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
41004282
// Fill up the pointer arrays and transfer execution to the device.
41014283
auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
41024284
hasVLACaptures, Device, OutlinedFnID, OffloadError,
4103-
OffloadErrorQType](CodeGenFunction &CGF) {
4285+
OffloadErrorQType, &D](CodeGenFunction &CGF) {
41044286
unsigned PointerNumVal = BasePointers.size();
41054287
llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
41064288
llvm::Value *BasePointersArray;
@@ -4240,11 +4422,34 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
42404422
else
42414423
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
42424424

4243-
llvm::Value *OffloadingArgs[] = {
4244-
DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
4245-
PointersArray, SizesArray, MapTypesArray};
4246-
auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
4247-
OffloadingArgs);
4425+
// Return value of the runtime offloading call.
4426+
llvm::Value *Return;
4427+
4428+
auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D);
4429+
auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D);
4430+
4431+
// If we have NumTeams defined this means that we have an enclosed teams
4432+
// region. Therefore we also expect to have ThreadLimit defined. These two
4433+
// values should be defined in the presence of a teams directive, regardless
4434+
// of having any clauses associated. If the user is using teams but no
4435+
// clauses, these two values will be the default that should be passed to
4436+
// the runtime library - a 32-bit integer with the value zero.
4437+
if (NumTeams) {
4438+
assert(ThreadLimit && "Thread limit expression should be available along "
4439+
"with number of teams.");
4440+
llvm::Value *OffloadingArgs[] = {
4441+
DeviceID, OutlinedFnID, PointerNum,
4442+
BasePointersArray, PointersArray, SizesArray,
4443+
MapTypesArray, NumTeams, ThreadLimit};
4444+
Return = CGF.EmitRuntimeCall(
4445+
createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
4446+
} else {
4447+
llvm::Value *OffloadingArgs[] = {
4448+
DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
4449+
PointersArray, SizesArray, MapTypesArray};
4450+
Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
4451+
OffloadingArgs);
4452+
}
42484453

42494454
CGF.EmitStoreOfScalar(Return, OffloadError);
42504455
};

‎clang/lib/CodeGen/CGStmtOpenMP.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -2716,8 +2716,12 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
27162716
CapturedVars);
27172717
}
27182718

2719-
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
2720-
llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
2719+
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
2720+
OMPLexicalScope Scope(*this, S);
2721+
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
2722+
2723+
// FIXME: We should fork teams here instead of just emit the statement.
2724+
EmitStmt(CS.getCapturedStmt());
27212725
}
27222726

27232727
void CodeGenFunction::EmitOMPCancellationPointDirective(

‎clang/test/OpenMP/teams_codegen.cpp

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
// expected-no-diagnostics
2+
#ifndef HEADER
3+
#define HEADER
4+
// Test host codegen.
5+
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
6+
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
7+
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
8+
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
9+
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
10+
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
11+
#ifdef CK1
12+
13+
int Gbla;
14+
long long Gblb;
15+
int &Gblc = Gbla;
16+
17+
// CK1-LABEL: teams_argument_global_local
18+
int teams_argument_global_local(int a){
19+
int comp = 1;
20+
21+
int la = 23;
22+
float lc = 25.0;
23+
24+
// CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
25+
// CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
26+
#pragma omp target
27+
#pragma omp teams
28+
{
29+
++comp;
30+
}
31+
32+
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
33+
// CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
34+
35+
// CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
36+
#pragma omp target
37+
#pragma omp teams num_teams(la)
38+
{
39+
++comp;
40+
}
41+
42+
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
43+
// CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
44+
45+
// CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
46+
#pragma omp target
47+
#pragma omp teams thread_limit(la)
48+
{
49+
++comp;
50+
}
51+
52+
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
53+
54+
// CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
55+
// CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
56+
// CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
57+
58+
// CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
59+
// CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
60+
// CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
61+
// CK1-DAG: [[TLD]] = load float, float* %{{.+}},
62+
// CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
63+
64+
// CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
65+
#pragma omp target
66+
#pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
67+
{
68+
++comp;
69+
}
70+
71+
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
72+
73+
// CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
74+
// CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
75+
76+
// CK1-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 2
77+
// CK1-DAG: [[TLA]] = load i32, i32* @Gbla,
78+
79+
// CK1: call void @{{.+}}(i{{.+}} {{.+}}
80+
#pragma omp target
81+
#pragma omp teams num_teams(Gblc+1) thread_limit(Gblc+2)
82+
{
83+
comp += Gblc;
84+
}
85+
86+
return comp;
87+
}
88+
89+
#endif // CK1
90+
91+
// Test host codegen.
92+
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
93+
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
94+
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
95+
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
96+
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
97+
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
98+
#ifdef CK2
99+
100+
// CK2-DAG: [[SSI:%.+]] = type { i32, float }
101+
// CK2-DAG: [[SSL:%.+]] = type { i64, float }
102+
template <typename T>
103+
struct SS{
104+
T a;
105+
float b;
106+
};
107+
108+
SS<int> Gbla;
109+
SS<long long> Gblb;
110+
111+
// CK2-LABEL: teams_template_arg
112+
int teams_template_arg(void) {
113+
int comp = 1;
114+
115+
SS<int> la;
116+
SS<long long> lb;
117+
118+
// CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
119+
120+
// CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
121+
122+
// CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
123+
// CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
124+
// CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
125+
// CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
126+
127+
// CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
128+
#pragma omp target
129+
#pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
130+
{
131+
++comp;
132+
}
133+
134+
// CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
135+
136+
// CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
137+
// CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
138+
139+
// CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
140+
// CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
141+
// CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
142+
// CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
143+
144+
// CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
145+
#pragma omp target
146+
#pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
147+
{
148+
++comp;
149+
}
150+
return comp;
151+
}
152+
#endif // CK2
153+
154+
// Test host codegen.
155+
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
156+
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
157+
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
158+
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
159+
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
160+
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
161+
#ifdef CK3
162+
163+
// CK3: [[SSI:%.+]] = type { i32, float }
164+
// CK3-LABEL: teams_template_struct
165+
166+
template <typename T, int X, long long Y>
167+
struct SS{
168+
T a;
169+
float b;
170+
171+
int foo(void) {
172+
int comp = 1;
173+
174+
// CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
175+
176+
// CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
177+
// CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
178+
// CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
179+
180+
// CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
181+
#pragma omp target
182+
#pragma omp teams num_teams(a) thread_limit(X)
183+
{
184+
++comp;
185+
}
186+
187+
// CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
188+
189+
// CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
190+
// CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
191+
// CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
192+
// CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[THIS:%[^,]+]], i32 0, i32 1
193+
194+
// CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
195+
#pragma omp target
196+
#pragma omp teams num_teams(Y) thread_limit((int)b+X)
197+
{
198+
++comp;
199+
}
200+
return comp;
201+
}
202+
};
203+
204+
int teams_template_struct(void) {
205+
SS<int, 123, 456> V;
206+
return V.foo();
207+
208+
}
209+
#endif // CK3
210+
#endif

0 commit comments

Comments
 (0)
Please sign in to comment.