Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -166,6 +166,10 @@ // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -925,6 +929,10 @@ /// \param ParentName Name of the function that encloses the target region. /// \param OutlinedFn Outlined function value to be defined by this call. /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param NestedNumTeamsFn Function that returns the number of teams + /// associated with a nested temas directive. + /// \param NestedThreadLimitFn Function that returns the thread + /// limitassociated with a nested temas directive. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// An oulined function may not be an entry if, e.g. the if clause always /// evaluates to false. @@ -932,6 +940,8 @@ StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + llvm::Function *&NestedNumTeamsFn, + llvm::Function *&NestedThreadLimitFn, bool IsOffloadEntry); /// \brief Emit the target offloading code associated with \a D. The emitted @@ -940,17 +950,21 @@ /// \param D Directive to emit. /// \param OutlinedFn Host version of the code to be offloaded. /// \param OutlinedFnID ID of host version of the code to be offloaded. + /// \param NestedNumTeamsFn Function that return the number of teams of a + /// nested teams region, if any. + /// \param NestedThreadLimitFn Function that return the thread limit of a + /// nested teams region, if any. /// \param IfCond Expression evaluated in if clause associated with the target /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. /// \param CapturedVars Values captured in the current region. - virtual void emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, - llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device, - ArrayRef CapturedVars); + virtual void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, + llvm::Function *NestedNumTeamsFn, + llvm::Function *NestedThreadLimitFn, const Expr *IfCond, + const Expr *Device, ArrayRef CapturedVars); /// \brief Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -935,6 +935,24 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3717,6 +3735,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + llvm::Function *&NestedNumTeamsFn, llvm::Function *&NestedThreadLimitFn, bool IsOffloadEntry) { assert(!ParentName.empty() && "Invalid target region parent name!"); @@ -3753,11 +3772,13 @@ << Column; } - CodeGenFunction CGF(CGM, true); - CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + { + CodeGenFunction CGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); + OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); + } // If this target outline function is not an offload entry, we don't need to // register it. @@ -3787,6 +3808,53 @@ // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams and thread limit to pass to the runtime function call + // later on. This is done through a function that returns the value. This is + // required because the expression is captured in the enclosing target + // environment when the teams directive is not combined with target. This only + // has to be done for the host. + // + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (!CGM.getLangOpts().OpenMPIsDevice) + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *NTE = TeamsDir->getSingleClause()) { + auto &&CodeGen = [NTE](CodeGenFunction &CGF) { + auto *V = CGF.EmitScalarExpr(NTE->getNumTeams()); + CGF.Builder.CreateRet( + CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); + CGF.EmitBlock(CGF.createBasicBlock()); + }; + + CodeGenFunction CGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, + ".omp_offload.get_num_teams"); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + + NestedNumTeamsFn = + CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy); + NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline); + } + if (auto *TLE = TeamsDir->getSingleClause()) { + auto &&CodeGen = [TLE](CodeGenFunction &CGF) { + auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit()); + CGF.Builder.CreateRet( + CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); + CGF.EmitBlock(CGF.createBasicBlock()); + }; + + CodeGenFunction CGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, + ".omp_offload.get_thread_limit"); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + + NestedThreadLimitFn = + CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy); + NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline); + } + } return; } @@ -3794,6 +3862,8 @@ const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, + llvm::Function *NestedNumTeamsFn, + llvm::Function *NestedThreadLimitFn, const Expr *IfCond, const Expr *Device, ArrayRef CapturedVars) { if (!CGF.HaveInsertPoint()) @@ -3917,8 +3987,9 @@ // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, - hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { + &CS, hasVLACaptures, Device, OutlinedFnID, OffloadError, + OffloadErrorQType, NestedNumTeamsFn, + NestedThreadLimitFn](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4059,11 +4130,53 @@ else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; - auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + // Return value of the runtime offloading call. + llvm::Value *Return; + + // If the target region is associated with a teams region (not necessarily + // combined) we need to get the value with the number of teams and thread + // limit. These expressions can be nested in the target region - in that + // case we rely on NestedNumTeamsFn and NestedThreadLimitFn to retrieve the + // right values. If teams is not nested (is in a combined directive with + // target) we can emit the expressions directly. + + // FIXME: We need to accommodate combined directives here when they become + // supported. + if (auto *TD = dyn_cast(CS.getCapturedStmt())) { + llvm::Value *NumTeams = nullptr; + llvm::Value *ThreadLimit = nullptr; + if (TD->getSingleClause()) { + assert(NestedNumTeamsFn && "Helper function is required to get the " + "number of teams of an enclosed teams " + "directive."); + NumTeams = CGF.Builder.CreateCall(NestedNumTeamsFn, BasePointers); + } else + NumTeams = CGF.Builder.getInt32(0); + if (TD->getSingleClause()) { + assert(NestedThreadLimitFn && "Helper function is required to get the " + "thread limit of an enclosed teams " + "directive."); + ThreadLimit = CGF.Builder.CreateCall(NestedThreadLimitFn, BasePointers); + } else + ThreadLimit = CGF.Builder.getInt32(0); + + assert(NumTeams && ThreadLimit && "Thread limit and number of teams " + "should be defined for a teams " + "region."); + + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumTeams, ThreadLimit}; + Return = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + } else { + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; + Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + } CGF.EmitStoreOfScalar(Return, OffloadError); }; @@ -4133,7 +4246,9 @@ llvm::Function *Fn; llvm::Constant *Addr; - emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, + llvm::Function *NTFn; + llvm::Function *TLFn; + emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, NTFn, TLFn, /*isOffloadEntry=*/true); assert(Fn && Addr && "Target region emission failed."); return; Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -91,6 +91,12 @@ llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + return GenerateOpenMPCapturedStmtFunction(S, getContext().VoidTy); +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + QualType ReturnQTy) { assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); @@ -140,7 +146,7 @@ // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, + CGM.getTypes().arrangeFreeFunctionDeclaration(ReturnQTy, Args, ExtInfo, /*IsVariadic=*/false); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); @@ -152,7 +158,7 @@ F->addFnAttr(llvm::Attribute::NoUnwind); // Generate the function. - StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + StartFunction(CD, ReturnQTy, F, FuncInfo, Args, CD->getLocation(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = S.captures().begin(); @@ -2635,6 +2641,8 @@ llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; + llvm::Function *NestedNumTeamsFn = nullptr; + llvm::Function *NestedThreadLimitFn = nullptr; // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; @@ -2673,15 +2681,21 @@ ParentName = CGM.getMangledName(GlobalDecl(cast(CurFuncDecl))); - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, - IsOffloadEntry); + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, FnID, NestedNumTeamsFn, NestedThreadLimitFn, + IsOffloadEntry); - CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, + CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, NestedNumTeamsFn, + NestedThreadLimitFn, IfCond, Device, CapturedVars); } -void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { - llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + const CapturedStmt &CS = *cast(S.getAssociatedStmt()); + + // FIXME: We should fork teams here instead of just emit the statement. + EmitStmt(CS.getCapturedStmt()); } void CodeGenFunction::EmitOMPCancellationPointDirective( Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2216,6 +2216,8 @@ llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); + llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + QualType ReturnQTy); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); Index: test/OpenMP/teams_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/teams_codegen.cpp @@ -0,0 +1,211 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int Gbla; +long long Gblb; + +// CK1-LABEL: teams_argument_global_local +int teams_argument_global_local(int a){ + int comp = 1; + + int la = 23; + float lc = 25.0; + + // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32* + // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]], + // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]], + // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32* + // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]], + // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}}, + + + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams num_teams(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]]) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32* + // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]], + // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]], + // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32* + // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]], + // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}}, + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams thread_limit(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]] + // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}}, + // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}}, + + // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]] + // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64 + // CK1-DAG: [[TLD]] = load float, float* %{{.+}}, + // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}}, + + // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc) + { + ++comp; + } + + return comp; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2-DAG: [[SSI:%.+]] = type { i32, float } +// CK2-DAG: [[SSL:%.+]] = type { i64, float } +template +struct SS{ + T a; + float b; +}; + +SS Gbla; +SS Gblb; + +// CK2-LABEL: teams_template_arg +int teams_template_arg(void) { + int comp = 1; + + SS la; + SS lb; + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK2-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 + // CK2-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64 + // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b) + { + ++comp; + } + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32 + // CK2-DAG: [[TLD]] = load i64, i64* [[TLA:%[^,]+]], + // CK2-DAG: [[TLA]] = getelementptr inbounds [[SSL]], [[SSL]]* [[TLB:%[^,]+]], i32 0, i32 0 + // CK2-DAG: [[TLB]] = load [[SSL]]*, [[SSL]]** %{{.+}}, + + // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32 + // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64 + // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]], + // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a) + { + ++comp; + } + return comp; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { i32, float } +// CK3-LABEL: teams_template_struct + +template +struct SS{ + T a; + float b; + + int foo(void) { + int comp = 1; + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123) + + // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 + // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(a) thread_limit(X) + { + ++comp; + } + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]]) + + // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123 + // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32 + // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[TLD:%[^,]+]], i32 0, i32 1 + // CK3-DAG: [[TLD]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Y) thread_limit((int)b+X) + { + ++comp; + } + return comp; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK3 +#endif