Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -34,6 +34,7 @@ } // namespace llvm namespace clang { +class CapturedStmt; class Expr; class GlobalDecl; class OMPExecutableDirective; @@ -661,10 +662,14 @@ /// \param CodeGen Code generation sequence for the \a D directive. /// \param HasCancel true if region has inner cancel directive, false /// otherwise. + /// \param CS the captured statement associated with the region. It is only + /// required if the local variables information of the enclosing function may + /// not match with the captured variables. virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool HasCancel = false); + bool HasCancel = false, + const CapturedStmt *CS = nullptr); /// \brief Emit a code for reduction clause. Next code should be emitted for /// reduction: /// \code Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -255,21 +255,38 @@ /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; + /// \brief Saves the varaibles that were forced to be local in the current + /// inlined region. + SmallVector ForcedLocalVars; public: - /// \brief Constructs region for combined constructs. + /// \brief Constructs inlined region. Mostly used for combined constructs. If + /// a captured statement is provided it also ensures the captured variables + /// are all defined in the scope of the enclosing function. This is typical + /// used for regions that make local instances of global variables, e.g. + /// target regions. /// \param CodeGen Code generation sequence for combined directives. Includes /// a list of functions used for code generation of implicitly inlined /// regions. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) + OpenMPDirectiveKind Kind, bool HasCancel, + const CapturedStmt *CS) : CGF(CGF) { // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); + + // Ensures that all the captures are local in the current inlined region. + if (CS) + CGF.StartOpenMPInlinedCapturedRegion(*CS, ForcedLocalVars); } ~InlinedOpenMPRegionRAII() { + // Restore the local variable information if we have anything forced in this + // inlined region. + if (!ForcedLocalVars.empty()) + CGF.CloseOpenMPInlinedCapturedRegion(ForcedLocalVars); + // Restore original CapturedStmtInfo only if we're done with code emission. auto *OldCSI = cast(CGF.CapturedStmtInfo)->getOldCSI(); @@ -481,6 +498,10 @@ // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -1153,6 +1174,24 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3766,10 +3805,11 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnerKind, const RegionCodeGenTy &CodeGen, - bool HasCancel) { + bool HasCancel, + const CapturedStmt *CS) { if (!CGF.HaveInsertPoint()) return; - InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); + InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, CS); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } @@ -3972,6 +4012,114 @@ DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); } +/// \brief Emit the num_teams clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no num_teams clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any num_teams clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *NTE = TeamsDir->getSingleClause()) { + llvm::Value *NumTeams = nullptr; + auto &&CodeGen = [NTE, &NumTeams](CodeGenFunction &CGF) { + NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); + NumTeams = + CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, /*IsSigned=*/true); + }; + + OMPRuntime.emitInlinedDirective(CGF, OMPD_target, CodeGen, + /*HasCancel=*/false, &CS); + assert(NumTeams && "Invalid number of teams value."); + return NumTeams; + } + + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + +/// \brief Emit the thread_limit clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no thread_limit clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any thread_limit clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the thread limit to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *TLE = TeamsDir->getSingleClause()) { + llvm::Value *ThreadLimit = nullptr; + auto &&CodeGen = [TLE, &ThreadLimit](CodeGenFunction &CGF) { + ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); + ThreadLimit = CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + }; + + OMPRuntime.emitInlinedDirective(CGF, OMPD_target, CodeGen, + /*HasCancel=*/false, &CS); + assert(ThreadLimit && "Invalid number of teams value."); + return ThreadLimit; + } + + // If we have an enclosed teams directive but no thread_limit clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, @@ -4100,7 +4248,7 @@ // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { + OffloadErrorQType, &D](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4240,11 +4388,28 @@ else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; - auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + // Return value of the runtime offloading call. + llvm::Value *Return; + + auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D); + auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D); + + if (NumTeams) { + assert(ThreadLimit && "Thread limit expression should be available along " + "with number of teams."); + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumTeams, ThreadLimit}; + Return = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + } else { + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; + Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + } CGF.EmitStoreOfScalar(Return, OffloadError); }; Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -239,6 +239,39 @@ return F; } +/// \brief Start an OpenMP inlined region by forcing all the variables captured +/// to be defined as local variables at the current scope. This is required for +/// regions that capture global variables that have to be emitted as inlined +/// regions. +void CodeGenFunction::StartOpenMPInlinedCapturedRegion( + const CapturedStmt &S, SmallVector &ForcedLocalVars) { + auto I = S.capture_init_begin(); + auto CI = S.capture_begin(); + auto CE = S.capture_end(); + for (; CI != CE; ++CI, ++I) { + if (!CI->capturesVariable() && !CI->capturesVariableByCopy()) + continue; + + auto FoundDecl = LocalDeclMap.find(CI->getCapturedVar()); + // If it already exist, we do not need to force it. + if (FoundDecl != LocalDeclMap.end()) + continue; + + setAddrOfLocalVar(CI->getCapturedVar(), EmitLValue(*I).getAddress()); + } +} + +/// \brief Close an OpenMP inlined region by restoring the local variable +/// information that existed when the region was started. +void CodeGenFunction::CloseOpenMPInlinedCapturedRegion( + ArrayRef ForcedLocalVars) { + for (auto *VD : ForcedLocalVars) { + assert(LocalDeclMap.find(VD) != LocalDeclMap.end() && + "Variable expected to exist in the local declarations map."); + LocalDeclMap.erase(VD); + } +} + //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// @@ -2667,8 +2700,12 @@ CapturedVars); } -void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { - llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + const CapturedStmt &CS = *cast(S.getAssociatedStmt()); + + // FIXME: We should fork teams here instead of just emit the statement. + EmitStmt(CS.getCapturedStmt()); } void CodeGenFunction::EmitOMPCancellationPointDirective( Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2223,6 +2223,16 @@ llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); + /// \brief Start an OpenMP inlined region by forcing all the variables + /// captured to be defined as local variables at the current scope. This is + /// required for regions that capture global variables that have to be emitted + /// as inlined regions. + void StartOpenMPInlinedCapturedRegion( + const CapturedStmt &S, SmallVector &ForcedLocalVars); + /// \brief Close an OpenMP inlined region by restoring the local variable + /// information that existed when the region was started. + void + CloseOpenMPInlinedCapturedRegion(ArrayRef ForcedLocalVars); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, Index: test/OpenMP/teams_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/teams_codegen.cpp @@ -0,0 +1,194 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int Gbla; +long long Gblb; + +// CK1-LABEL: teams_argument_global_local +int teams_argument_global_local(int a){ + int comp = 1; + + int la = 23; + float lc = 25.0; + + // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams num_teams(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]]) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams thread_limit(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]] + // CK1-DAG: [[NTA]] = load i32, i32* @Gbla, + // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}}, + + // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]] + // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64 + // CK1-DAG: [[TLD]] = load float, float* %{{.+}}, + // CK1-DAG: [[TLB]] = load i64, i64* @Gblb, + + // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc) + { + ++comp; + } + + return comp; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2-DAG: [[SSI:%.+]] = type { i32, float } +// CK2-DAG: [[SSL:%.+]] = type { i64, float } +template +struct SS{ + T a; + float b; +}; + +SS Gbla; +SS Gblb; + +// CK2-LABEL: teams_template_arg +int teams_template_arg(void) { + int comp = 1; + + SS la; + SS lb; + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0) + + // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64 + // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b) + { + ++comp; + } + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32 + // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0), + + // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32 + // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64 + // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]], + // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a) + { + ++comp; + } + return comp; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { i32, float } +// CK3-LABEL: teams_template_struct + +template +struct SS{ + T a; + float b; + + int foo(void) { + int comp = 1; + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123) + + // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 + // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(a) thread_limit(X) + { + ++comp; + } + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]]) + + // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123 + // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32 + // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[THIS:%[^,]+]], i32 0, i32 1 + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Y) thread_limit((int)b+X) + { + ++comp; + } + return comp; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK3 +#endif