Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -481,6 +481,10 @@ // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -1153,6 +1157,24 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3972,6 +3994,136 @@ DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); } +/// \brief Emit the num_teams clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no num_teams clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitNumTeamsClauseForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + ArrayRef KernelArgs) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any num_teams clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams to pass to the runtime function call. This is done + // through a function that returns the value. This is required because the + // expression is captured in the enclosing target environment when the teams + // directive is not combined with target. + + CodeGenModule &CGM = CGF.CGM; + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *NTE = TeamsDir->getSingleClause()) { + auto &&CodeGen = [NTE](CodeGenFunction &CGF) { + auto *V = CGF.EmitScalarExpr(NTE->getNumTeams()); + CGF.Builder.CreateRet( + CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); + CGF.EmitBlock(CGF.createBasicBlock()); + }; + + llvm::Function *AuxFn; + { + CodeGenFunction CGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, + ".omp_offload.get_num_teams"); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + + AuxFn = CGF.GenerateOpenMPCapturedStmtFunction( + CS, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, + /*Signed=*/true)); + AuxFn->addFnAttr(llvm::Attribute::AlwaysInline); + } + assert(AuxFn && "Invalid auxiliar function!"); + return CGF.Builder.CreateCall(AuxFn, KernelArgs); + } + + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + +/// \brief Emit the thread_limit clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no thread_limit clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitThreadLimitClauseForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + ArrayRef KernelArgs) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any thread_limit clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams to pass to the runtime function call. This is done + // through a function that returns the value. This is required because the + // expression is captured in the enclosing target environment when the teams + // directive is not combined with target. + + CodeGenModule &CGM = CGF.CGM; + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *TLE = TeamsDir->getSingleClause()) { + auto &&CodeGen = [TLE](CodeGenFunction &CGF) { + auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit()); + CGF.Builder.CreateRet( + CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); + CGF.EmitBlock(CGF.createBasicBlock()); + }; + + llvm::Function *AuxFn; + { + CodeGenFunction CGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, + ".omp_offload.get_thread_limit"); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + + AuxFn = CGF.GenerateOpenMPCapturedStmtFunction( + CS, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, + /*Signed=*/true)); + AuxFn->addFnAttr(llvm::Attribute::AlwaysInline); + } + assert(AuxFn && "Invalid auxiliar function!"); + return CGF.Builder.CreateCall(AuxFn, KernelArgs); + } + + // If we have an enclosed teams directive but no thread_limit clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, @@ -4100,7 +4252,7 @@ // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { + OffloadErrorQType, &D](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4240,11 +4392,29 @@ else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; - auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + // Return value of the runtime offloading call. + llvm::Value *Return; + + auto *NumTeams = emitNumTeamsClauseForTargetDirective(CGF, D, BasePointers); + auto *ThreadLimit = + emitThreadLimitClauseForTargetDirective(CGF, D, BasePointers); + + if (NumTeams) { + assert(ThreadLimit && "Thread limit expression should be available along " + "with number of teams."); + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumTeams, ThreadLimit}; + Return = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + } else { + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; + Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + } CGF.EmitStoreOfScalar(Return, OffloadError); }; Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -131,6 +131,12 @@ llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + return GenerateOpenMPCapturedStmtFunction(S, getContext().VoidTy); +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + QualType ReturnQTy) { assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); @@ -180,7 +186,7 @@ // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, + CGM.getTypes().arrangeFreeFunctionDeclaration(ReturnQTy, Args, ExtInfo, /*IsVariadic=*/false); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); @@ -192,7 +198,7 @@ F->addFnAttr(llvm::Attribute::NoUnwind); // Generate the function. - StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + StartFunction(CD, ReturnQTy, F, FuncInfo, Args, CD->getLocation(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = S.captures().begin(); @@ -2680,8 +2686,12 @@ CapturedVars); } -void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { - llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + const CapturedStmt &CS = *cast(S.getAssociatedStmt()); + + // FIXME: We should fork teams here instead of just emit the statement. + EmitStmt(CS.getCapturedStmt()); } void CodeGenFunction::EmitOMPCancellationPointDirective( Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2216,6 +2216,8 @@ llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); + llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + QualType ReturnQTy); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); Index: test/OpenMP/teams_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/teams_codegen.cpp @@ -0,0 +1,211 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int Gbla; +long long Gblb; + +// CK1-LABEL: teams_argument_global_local +int teams_argument_global_local(int a){ + int comp = 1; + + int la = 23; + float lc = 25.0; + + // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32* + // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]], + // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]], + // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32* + // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]], + // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}}, + + + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams num_teams(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]]) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32* + // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]], + // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]], + // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32* + // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]], + // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}}, + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams thread_limit(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]] + // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}}, + // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}}, + + // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]] + // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64 + // CK1-DAG: [[TLD]] = load float, float* %{{.+}}, + // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}}, + + // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc) + { + ++comp; + } + + return comp; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2-DAG: [[SSI:%.+]] = type { i32, float } +// CK2-DAG: [[SSL:%.+]] = type { i64, float } +template +struct SS{ + T a; + float b; +}; + +SS Gbla; +SS Gblb; + +// CK2-LABEL: teams_template_arg +int teams_template_arg(void) { + int comp = 1; + + SS la; + SS lb; + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK2-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 + // CK2-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64 + // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b) + { + ++comp; + } + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32 + // CK2-DAG: [[TLD]] = load i64, i64* [[TLA:%[^,]+]], + // CK2-DAG: [[TLA]] = getelementptr inbounds [[SSL]], [[SSL]]* [[TLB:%[^,]+]], i32 0, i32 0 + // CK2-DAG: [[TLB]] = load [[SSL]]*, [[SSL]]** %{{.+}}, + + // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32 + // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64 + // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]], + // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a) + { + ++comp; + } + return comp; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { i32, float } +// CK3-LABEL: teams_template_struct + +template +struct SS{ + T a; + float b; + + int foo(void) { + int comp = 1; + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123) + + // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 + // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(a) thread_limit(X) + { + ++comp; + } + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]]) + + // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123 + // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32 + // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[TLD:%[^,]+]], i32 0, i32 1 + // CK3-DAG: [[TLD]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Y) thread_limit((int)b+X) + { + ++comp; + } + return comp; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK3 +#endif