@@ -252,6 +252,70 @@ class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
252
252
StringRef HelperName;
253
253
};
254
254
255
+ static void EmptyCodeGen (CodeGenFunction &) {
256
+ llvm_unreachable (" No codegen for expressions" );
257
+ }
258
+ // / \brief API for generation of expressions captured in a innermost OpenMP
259
+ // / region.
260
+ class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
261
+ public:
262
+ CGOpenMPInnerExprInfo (CodeGenFunction &CGF, const CapturedStmt &CS)
263
+ : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
264
+ OMPD_unknown,
265
+ /* HasCancel=*/ false ),
266
+ PrivScope (CGF) {
267
+ // Make sure the globals captured in the provided statement are local by
268
+ // using the privatization logic. We assume the same variable is not
269
+ // captured more than once.
270
+ for (auto &C : CS.captures ()) {
271
+ if (!C.capturesVariable () && !C.capturesVariableByCopy ())
272
+ continue ;
273
+
274
+ const VarDecl *VD = C.getCapturedVar ();
275
+ if (VD->isLocalVarDeclOrParm ())
276
+ continue ;
277
+
278
+ DeclRefExpr DRE (const_cast <VarDecl *>(VD),
279
+ /* RefersToEnclosingVariableOrCapture=*/ false ,
280
+ VD->getType ().getNonReferenceType (), VK_LValue,
281
+ SourceLocation ());
282
+ PrivScope.addPrivate (VD, [&CGF, &DRE]() -> Address {
283
+ return CGF.EmitLValue (&DRE).getAddress ();
284
+ });
285
+ }
286
+ (void )PrivScope.Privatize ();
287
+ }
288
+
289
+ // / \brief Lookup the captured field decl for a variable.
290
+ const FieldDecl *lookup (const VarDecl *VD) const override {
291
+ if (auto *FD = CGOpenMPInlinedRegionInfo::lookup (VD))
292
+ return FD;
293
+ return nullptr ;
294
+ }
295
+
296
+ // / \brief Emit the captured statement body.
297
+ void EmitBody (CodeGenFunction &CGF, const Stmt *S) override {
298
+ llvm_unreachable (" No body for expressions" );
299
+ }
300
+
301
+ // / \brief Get a variable or parameter for storing global thread id
302
+ // / inside OpenMP construct.
303
+ const VarDecl *getThreadIDVariable () const override {
304
+ llvm_unreachable (" No thread id for expressions" );
305
+ }
306
+
307
+ // / \brief Get the name of the capture helper.
308
+ StringRef getHelperName () const override {
309
+ llvm_unreachable (" No helper name for expressions" );
310
+ }
311
+
312
+ static bool classof (const CGCapturedStmtInfo *Info) { return false ; }
313
+
314
+ private:
315
+ // / Private scope to capture global variables.
316
+ CodeGenFunction::OMPPrivateScope PrivScope;
317
+ };
318
+
255
319
// / \brief RAII for emitting code of OpenMP constructs.
256
320
class InlinedOpenMPRegionRAII {
257
321
CodeGenFunction &CGF;
@@ -481,6 +545,10 @@ enum OpenMPRTLFunction {
481
545
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
482
546
// *arg_types);
483
547
OMPRTL__tgt_target,
548
+ // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
549
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
550
+ // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
551
+ OMPRTL__tgt_target_teams,
484
552
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
485
553
OMPRTL__tgt_register_lib,
486
554
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -1153,6 +1221,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1153
1221
RTLFn = CGM.CreateRuntimeFunction (FnTy, " __tgt_target" );
1154
1222
break ;
1155
1223
}
1224
+ case OMPRTL__tgt_target_teams: {
1225
+ // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
1226
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
1227
+ // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
1228
+ llvm::Type *TypeParams[] = {CGM.Int32Ty ,
1229
+ CGM.VoidPtrTy ,
1230
+ CGM.Int32Ty ,
1231
+ CGM.VoidPtrPtrTy ,
1232
+ CGM.VoidPtrPtrTy ,
1233
+ CGM.SizeTy ->getPointerTo (),
1234
+ CGM.Int32Ty ->getPointerTo (),
1235
+ CGM.Int32Ty ,
1236
+ CGM.Int32Ty };
1237
+ llvm::FunctionType *FnTy =
1238
+ llvm::FunctionType::get (CGM.Int32Ty , TypeParams, /* isVarArg*/ false );
1239
+ RTLFn = CGM.CreateRuntimeFunction (FnTy, " __tgt_target_teams" );
1240
+ break ;
1241
+ }
1156
1242
case OMPRTL__tgt_register_lib: {
1157
1243
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
1158
1244
QualType ParamTy =
@@ -3972,6 +4058,102 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
3972
4058
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
3973
4059
}
3974
4060
4061
+ // / \brief Emit the num_teams clause of an enclosed teams directive at the
4062
+ // / target region scope. If there is no teams directive associated with the
4063
+ // / target directive, or if there is no num_teams clause associated with the
4064
+ // / enclosed teams directive, return nullptr.
4065
+ static llvm::Value *
4066
+ emitNumTeamsClauseForTargetDirective (CGOpenMPRuntime &OMPRuntime,
4067
+ CodeGenFunction &CGF,
4068
+ const OMPExecutableDirective &D) {
4069
+
4070
+ assert (!CGF.getLangOpts ().OpenMPIsDevice && " Clauses associated with the "
4071
+ " teams directive expected to be "
4072
+ " emitted only for the host!" );
4073
+
4074
+ // FIXME: For the moment we do not support combined directives with target and
4075
+ // teams, so we do not expect to get any num_teams clause in the provided
4076
+ // directive. Once we support that, this assertion can be replaced by the
4077
+ // actual emission of the clause expression.
4078
+ assert (D.getSingleClause <OMPNumTeamsClause>() == nullptr &&
4079
+ " Not expecting clause in directive." );
4080
+
4081
+ // If the current target region has a teams region enclosed, we need to get
4082
+ // the number of teams to pass to the runtime function call. This is done
4083
+ // by generating the expression in a inlined region. This is required because
4084
+ // the expression is captured in the enclosing target environment when the
4085
+ // teams directive is not combined with target.
4086
+
4087
+ const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt ());
4088
+
4089
+ // FIXME: Accommodate other combined directives with teams when they become
4090
+ // available.
4091
+ if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt ())) {
4092
+ if (auto *NTE = TeamsDir->getSingleClause <OMPNumTeamsClause>()) {
4093
+ CGOpenMPInnerExprInfo CGInfo (CGF, CS);
4094
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII (CGF, &CGInfo);
4095
+ llvm::Value *NumTeams = CGF.EmitScalarExpr (NTE->getNumTeams ());
4096
+ return CGF.Builder .CreateIntCast (NumTeams, CGF.Int32Ty ,
4097
+ /* IsSigned=*/ true );
4098
+ }
4099
+
4100
+ // If we have an enclosed teams directive but no num_teams clause we use
4101
+ // the default value 0.
4102
+ return CGF.Builder .getInt32 (0 );
4103
+ }
4104
+
4105
+ // No teams associated with the directive.
4106
+ return nullptr ;
4107
+ }
4108
+
4109
+ // / \brief Emit the thread_limit clause of an enclosed teams directive at the
4110
+ // / target region scope. If there is no teams directive associated with the
4111
+ // / target directive, or if there is no thread_limit clause associated with the
4112
+ // / enclosed teams directive, return nullptr.
4113
+ static llvm::Value *
4114
+ emitThreadLimitClauseForTargetDirective (CGOpenMPRuntime &OMPRuntime,
4115
+ CodeGenFunction &CGF,
4116
+ const OMPExecutableDirective &D) {
4117
+
4118
+ assert (!CGF.getLangOpts ().OpenMPIsDevice && " Clauses associated with the "
4119
+ " teams directive expected to be "
4120
+ " emitted only for the host!" );
4121
+
4122
+ // FIXME: For the moment we do not support combined directives with target and
4123
+ // teams, so we do not expect to get any thread_limit clause in the provided
4124
+ // directive. Once we support that, this assertion can be replaced by the
4125
+ // actual emission of the clause expression.
4126
+ assert (D.getSingleClause <OMPThreadLimitClause>() == nullptr &&
4127
+ " Not expecting clause in directive." );
4128
+
4129
+ // If the current target region has a teams region enclosed, we need to get
4130
+ // the thread limit to pass to the runtime function call. This is done
4131
+ // by generating the expression in a inlined region. This is required because
4132
+ // the expression is captured in the enclosing target environment when the
4133
+ // teams directive is not combined with target.
4134
+
4135
+ const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt ());
4136
+
4137
+ // FIXME: Accommodate other combined directives with teams when they become
4138
+ // available.
4139
+ if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt ())) {
4140
+ if (auto *TLE = TeamsDir->getSingleClause <OMPThreadLimitClause>()) {
4141
+ CGOpenMPInnerExprInfo CGInfo (CGF, CS);
4142
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII (CGF, &CGInfo);
4143
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr (TLE->getThreadLimit ());
4144
+ return CGF.Builder .CreateIntCast (ThreadLimit, CGF.Int32Ty ,
4145
+ /* IsSigned=*/ true );
4146
+ }
4147
+
4148
+ // If we have an enclosed teams directive but no thread_limit clause we use
4149
+ // the default value 0.
4150
+ return CGF.Builder .getInt32 (0 );
4151
+ }
4152
+
4153
+ // No teams associated with the directive.
4154
+ return nullptr ;
4155
+ }
4156
+
3975
4157
void CGOpenMPRuntime::emitTargetCall (CodeGenFunction &CGF,
3976
4158
const OMPExecutableDirective &D,
3977
4159
llvm::Value *OutlinedFn,
@@ -4100,7 +4282,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
4100
4282
// Fill up the pointer arrays and transfer execution to the device.
4101
4283
auto &&ThenGen = [this , &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
4102
4284
hasVLACaptures, Device, OutlinedFnID, OffloadError,
4103
- OffloadErrorQType](CodeGenFunction &CGF) {
4285
+ OffloadErrorQType, &D ](CodeGenFunction &CGF) {
4104
4286
unsigned PointerNumVal = BasePointers.size ();
4105
4287
llvm::Value *PointerNum = CGF.Builder .getInt32 (PointerNumVal);
4106
4288
llvm::Value *BasePointersArray;
@@ -4240,11 +4422,34 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
4240
4422
else
4241
4423
DeviceID = CGF.Builder .getInt32 (OMP_DEVICEID_UNDEF);
4242
4424
4243
- llvm::Value *OffloadingArgs[] = {
4244
- DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
4245
- PointersArray, SizesArray, MapTypesArray};
4246
- auto Return = CGF.EmitRuntimeCall (createRuntimeFunction (OMPRTL__tgt_target),
4247
- OffloadingArgs);
4425
+ // Return value of the runtime offloading call.
4426
+ llvm::Value *Return;
4427
+
4428
+ auto *NumTeams = emitNumTeamsClauseForTargetDirective (*this , CGF, D);
4429
+ auto *ThreadLimit = emitThreadLimitClauseForTargetDirective (*this , CGF, D);
4430
+
4431
+ // If we have NumTeams defined this means that we have an enclosed teams
4432
+ // region. Therefore we also expect to have ThreadLimit defined. These two
4433
+ // values should be defined in the presence of a teams directive, regardless
4434
+ // of having any clauses associated. If the user is using teams but no
4435
+ // clauses, these two values will be the default that should be passed to
4436
+ // the runtime library - a 32-bit integer with the value zero.
4437
+ if (NumTeams) {
4438
+ assert (ThreadLimit && " Thread limit expression should be available along "
4439
+ " with number of teams." );
4440
+ llvm::Value *OffloadingArgs[] = {
4441
+ DeviceID, OutlinedFnID, PointerNum,
4442
+ BasePointersArray, PointersArray, SizesArray,
4443
+ MapTypesArray, NumTeams, ThreadLimit};
4444
+ Return = CGF.EmitRuntimeCall (
4445
+ createRuntimeFunction (OMPRTL__tgt_target_teams), OffloadingArgs);
4446
+ } else {
4447
+ llvm::Value *OffloadingArgs[] = {
4448
+ DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
4449
+ PointersArray, SizesArray, MapTypesArray};
4450
+ Return = CGF.EmitRuntimeCall (createRuntimeFunction (OMPRTL__tgt_target),
4451
+ OffloadingArgs);
4452
+ }
4248
4453
4249
4454
CGF.EmitStoreOfScalar (Return, OffloadError);
4250
4455
};
0 commit comments