Skip to content

Commit 7971209

Browse files
author
Carlo Bertolli
committedFeb 28, 2018
[OpenMP] Extend NVPTX SPMD implementation of combined constructs
Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368
1 parent ec03d7e commit 7971209

16 files changed

+476
-52
lines changed
 

‎clang/include/clang/Basic/LangOptions.def

+1
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40
197197
LANGOPT(OpenMPSimd , 1, 0, "Use SIMD only OpenMP support.")
198198
LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
199199
LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
200+
LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode")
200201
LANGOPT(RenderScript , 1, 0, "RenderScript")
201202

202203
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")

‎clang/include/clang/Driver/Options.td

+2
Original file line numberDiff line numberDiff line change
@@ -1424,6 +1424,8 @@ def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">,
14241424
def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
14251425
HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
14261426
def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
1427+
def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
1428+
def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
14271429
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
14281430
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
14291431
def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">;

‎clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

+12-21
Original file line numberDiff line numberDiff line change
@@ -271,21 +271,10 @@ bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
271271
}
272272

273273
static CGOpenMPRuntimeNVPTX::ExecutionMode
274-
getExecutionModeForDirective(CodeGenModule &CGM,
275-
const OMPExecutableDirective &D) {
276-
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
277-
switch (DirectiveKind) {
278-
case OMPD_target:
279-
case OMPD_target_teams:
280-
return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
281-
case OMPD_target_parallel:
282-
case OMPD_target_parallel_for:
283-
case OMPD_target_parallel_for_simd:
284-
return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
285-
default:
286-
llvm_unreachable("Unsupported directive on NVPTX device.");
287-
}
288-
llvm_unreachable("Unsupported directive on NVPTX device.");
274+
getExecutionMode(CodeGenModule &CGM) {
275+
return CGM.getLangOpts().OpenMPCUDAMode
276+
? CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd
277+
: CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
289278
}
290279

291280
void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
@@ -819,8 +808,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
819808

820809
assert(!ParentName.empty() && "Invalid target region parent name!");
821810

822-
CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
823-
getExecutionModeForDirective(CGM, D);
811+
CGOpenMPRuntimeNVPTX::ExecutionMode Mode = getExecutionMode(CGM);
824812
switch (Mode) {
825813
case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
826814
emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
@@ -1051,10 +1039,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
10511039
// TODO: Do something with IfCond when support for the 'if' clause
10521040
// is added on Spmd target directives.
10531041
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1054-
OutlinedFnArgs.push_back(
1055-
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
1056-
OutlinedFnArgs.push_back(
1057-
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
1042+
1043+
Address ZeroAddr = CGF.CreateMemTemp(
1044+
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
1045+
".zero.addr");
1046+
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1047+
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1048+
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
10581049
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
10591050
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
10601051
}

‎clang/lib/CodeGen/CGStmtOpenMP.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -4260,6 +4260,7 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
42604260
static void emitTargetTeamsDistributeParallelForRegion(
42614261
CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
42624262
PrePostActionTy &Action) {
4263+
Action.Enter(CGF);
42634264
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
42644265
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
42654266
S.getDistInc());
@@ -4310,6 +4311,7 @@ static void emitTargetTeamsDistributeParallelForSimdRegion(
43104311
CodeGenFunction &CGF,
43114312
const OMPTargetTeamsDistributeParallelForSimdDirective &S,
43124313
PrePostActionTy &Action) {
4314+
Action.Enter(CGF);
43134315
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
43144316
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
43154317
S.getDistInc());

‎clang/lib/Driver/ToolChains/Clang.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -3970,6 +3970,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
39703970
options::OPT_fnoopenmp_use_tls, /*Default=*/true))
39713971
CmdArgs.push_back("-fnoopenmp-use-tls");
39723972
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
3973+
3974+
// When in OpenMP offloading mode with NVPTX target, forward
3975+
// cuda-mode flag
3976+
Args.AddLastArg(CmdArgs, options::OPT_fopenmp_cuda_mode,
3977+
options::OPT_fno_openmp_cuda_mode);
39733978
break;
39743979
default:
39753980
// By default, if Clang doesn't know how to generate useful OpenMP code

‎clang/lib/Frontend/CompilerInvocation.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -2526,6 +2526,10 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
25262526
<< Opts.OMPHostIRFile;
25272527
}
25282528

2529+
// set CUDA mode for OpenMP target NVPTX if specified in options
2530+
Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&
2531+
Args.hasArg(options::OPT_fopenmp_cuda_mode);
2532+
25292533
// Record whether the __DEPRECATED define was requested.
25302534
Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro,
25312535
OPT_fno_deprecated_macro,

‎clang/test/OpenMP/nvptx_target_parallel_codegen.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// Test target codegen - host bc file has to be created first.
2-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
77
// expected-no-diagnostics
88
#ifndef HEADER
99
#define HEADER
@@ -62,7 +62,7 @@ int bar(int n){
6262
// CHECK: br label {{%?}}[[EXEC:.+]]
6363
//
6464
// CHECK: [[EXEC]]
65-
// CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]])
65+
// CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
6666
// CHECK: br label {{%?}}[[DONE:.+]]
6767
//
6868
// CHECK: [[DONE]]
@@ -104,7 +104,7 @@ int bar(int n){
104104
// CHECK: br label {{%?}}[[EXEC:.+]]
105105
//
106106
// CHECK: [[EXEC]]
107-
// CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
107+
// CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
108108
// CHECK: br label {{%?}}[[DONE:.+]]
109109
//
110110
// CHECK: [[DONE]]

‎clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// Test target codegen - host bc file has to be created first.
2-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
77
// expected-no-diagnostics
88
#ifndef HEADER
99
#define HEADER
@@ -51,7 +51,7 @@ int bar(int n){
5151
//
5252
// CHECK: [[EXEC]]
5353
// CHECK-NOT: call void @__kmpc_push_num_threads
54-
// CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]])
54+
// CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
5555
// CHECK: br label {{%?}}[[DONE:.+]]
5656
//
5757
// CHECK: [[DONE]]
@@ -94,7 +94,7 @@ int bar(int n){
9494
//
9595
// CHECK: [[EXEC]]
9696
// CHECK-NOT: call void @__kmpc_push_num_threads
97-
// CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
97+
// CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
9898
// CHECK: br label {{%?}}[[DONE:.+]]
9999
//
100100
// CHECK: [[DONE]]

‎clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// Test target codegen - host bc file has to be created first.
2-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6-
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
77
// expected-no-diagnostics
88
#ifndef HEADER
99
#define HEADER
@@ -52,7 +52,7 @@ int bar(int n){
5252
//
5353
// CHECK: [[EXEC]]
5454
// CHECK-NOT: call void @__kmpc_push_proc_bind
55-
// CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
55+
// CHECK: {{call|invoke}} void [[OP1:@.+]](
5656
// CHECK: br label {{%?}}[[DONE:.+]]
5757
//
5858
// CHECK: [[DONE]]
@@ -73,7 +73,7 @@ int bar(int n){
7373
//
7474
// CHECK: [[EXEC]]
7575
// CHECK-NOT: call void @__kmpc_push_proc_bind
76-
// CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
76+
// CHECK: {{call|invoke}} void [[OP1:@.+]](
7777
// CHECK: br label {{%?}}[[DONE:.+]]
7878
//
7979
// CHECK: [[DONE]]
@@ -93,7 +93,7 @@ int bar(int n){
9393
//
9494
// CHECK: [[EXEC]]
9595
// CHECK-NOT: call void @__kmpc_push_proc_bind
96-
// CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
96+
// CHECK: {{call|invoke}} void [[OP1:@.+]](
9797
// CHECK: br label {{%?}}[[DONE:.+]]
9898
//
9999
// CHECK: [[DONE]]

‎clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// Test target codegen - host bc file has to be created first.
2-
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3-
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4-
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5-
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6-
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
77
// expected-no-diagnostics
88
#ifndef HEADER
99
#define HEADER
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Test target codegen - host bc file has to be created first.
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
7+
// expected-no-diagnostics
8+
#ifndef HEADER
9+
#define HEADER
10+
11+
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
12+
// CHECK-DAG: {{@__omp_offloading_.+l24}}_exec_mode = weak constant i8 0
13+
// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0
14+
// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0
15+
16+
#define N 1000
17+
18+
template<typename tx>
19+
tx ftemplate(int n) {
20+
tx a[N];
21+
short aa[N];
22+
tx b[10];
23+
24+
#pragma omp target simd
25+
for(int i = 0; i < n; i++) {
26+
a[i] = 1;
27+
}
28+
29+
#pragma omp target simd
30+
for(int i = 0; i < n; i++) {
31+
aa[i] += 1;
32+
}
33+
34+
#pragma omp target simd
35+
for(int i = 0; i < 10; i++) {
36+
b[i] += 1;
37+
}
38+
39+
return a[0];
40+
}
41+
42+
int bar(int n){
43+
int a = 0;
44+
45+
a += ftemplate<int>(n);
46+
47+
return a;
48+
}
49+
50+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l24}}(
51+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
52+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
53+
// CHECK-NOT: call void @__kmpc_for_static_init
54+
// CHECK-NOT: call void @__kmpc_for_static_fini
55+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
56+
// CHECK: ret void
57+
58+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l29}}(
59+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
60+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
61+
// CHECK-NOT: call void @__kmpc_for_static_init
62+
// CHECK-NOT: call void @__kmpc_for_static_fini
63+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
64+
// CHECK: ret void
65+
66+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l34}}(
67+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
68+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
69+
// CHECK-NOT: call void @__kmpc_for_static_init
70+
// CHECK-NOT: call void @__kmpc_for_static_fini
71+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
72+
// CHECK: ret void
73+
74+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Test target codegen - host bc file has to be created first.
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
7+
// expected-no-diagnostics
8+
#ifndef HEADER
9+
#define HEADER
10+
11+
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
12+
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
13+
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
14+
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
15+
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
16+
17+
#define N 1000
18+
#define M 10
19+
20+
template<typename tx>
21+
tx ftemplate(int n) {
22+
tx a[N];
23+
short aa[N];
24+
tx b[10];
25+
tx c[M][M];
26+
tx f = n;
27+
tx l;
28+
int k;
29+
30+
#pragma omp target teams distribute parallel for lastprivate(l) dist_schedule(static,128) schedule(static,32)
31+
for(int i = 0; i < n; i++) {
32+
a[i] = 1;
33+
l = i;
34+
}
35+
36+
#pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
37+
for(int i = 0; i < n; i++) {
38+
aa[i] += 1;
39+
}
40+
41+
#pragma omp target teams distribute parallel for map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
42+
for(int i = 0; i < 10; i++) {
43+
b[i] += 1;
44+
}
45+
46+
#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M)
47+
for(int i = 0; i < M; i++) {
48+
for(int j = 0; j < M; j++) {
49+
k = M;
50+
c[i][j] = i+j*f+k;
51+
}
52+
}
53+
54+
return a[0];
55+
}
56+
57+
int bar(int n){
58+
int a = 0;
59+
60+
a += ftemplate<int>(n);
61+
62+
return a;
63+
}
64+
65+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
66+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
67+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
68+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
69+
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
70+
// CHECK: call void @__kmpc_for_static_fini(
71+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
72+
// CHECK: ret void
73+
74+
// CHECK: define internal void [[OUTL1]](
75+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
76+
// CHECK: call void @__kmpc_for_static_fini(
77+
// CHECK: ret void
78+
79+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
80+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
81+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
82+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
83+
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
84+
// CHECK: call void @__kmpc_for_static_fini(
85+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
86+
// CHECK: ret void
87+
88+
// CHECK: define internal void [[OUTL2]](
89+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
90+
// CHECK: call void @__kmpc_for_static_fini(
91+
// CHECK: ret void
92+
93+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
94+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
95+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
96+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
97+
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
98+
// CHECK: call void @__kmpc_for_static_fini(
99+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
100+
// CHECK: ret void
101+
102+
// CHECK: define internal void [[OUTL3]](
103+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
104+
// CHECK: call void @__kmpc_for_static_fini(
105+
// CHECK: ret void
106+
107+
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
108+
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
109+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
110+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
111+
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
112+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
113+
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
114+
// CHECK: call void @__kmpc_for_static_fini(
115+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
116+
// CHECK: ret void
117+
118+
// CHECK: define internal void [[OUTL4]](
119+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
120+
// CHECK: call void @__kmpc_for_static_fini(
121+
// CHECK: ret void
122+
123+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Test target codegen - host bc file has to be created first.
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
7+
// expected-no-diagnostics
8+
#ifndef HEADER
9+
#define HEADER
10+
11+
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
12+
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
13+
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
14+
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
15+
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
16+
17+
#define N 1000
18+
#define M 10
19+
20+
template<typename tx>
21+
tx ftemplate(int n) {
22+
tx a[N];
23+
short aa[N];
24+
tx b[10];
25+
tx c[M][M];
26+
tx f = n;
27+
tx l;
28+
int k;
29+
30+
#pragma omp target teams distribute parallel for simd lastprivate(l) dist_schedule(static,128) schedule(static,32)
31+
for(int i = 0; i < n; i++) {
32+
a[i] = 1;
33+
l = i;
34+
}
35+
36+
#pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
37+
for(int i = 0; i < n; i++) {
38+
aa[i] += 1;
39+
}
40+
41+
#pragma omp target teams distribute parallel for simd map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
42+
for(int i = 0; i < 10; i++) {
43+
b[i] += 1;
44+
}
45+
46+
#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M)
47+
for(int i = 0; i < M; i++) {
48+
for(int j = 0; j < M; j++) {
49+
k = M;
50+
c[i][j] = i+j*f+k;
51+
}
52+
}
53+
54+
return a[0];
55+
}
56+
57+
int bar(int n){
58+
int a = 0;
59+
60+
a += ftemplate<int>(n);
61+
62+
return a;
63+
}
64+
65+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
66+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
67+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
68+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
69+
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
70+
// CHECK: call void @__kmpc_for_static_fini(
71+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
72+
// CHECK: ret void
73+
74+
// CHECK: define internal void [[OUTL1]](
75+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
76+
// CHECK: call void @__kmpc_for_static_fini(
77+
// CHECK: ret void
78+
79+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
80+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
81+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
82+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
83+
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
84+
// CHECK: call void @__kmpc_for_static_fini(
85+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
86+
// CHECK: ret void
87+
88+
// CHECK: define internal void [[OUTL2]](
89+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
90+
// CHECK: call void @__kmpc_for_static_fini(
91+
// CHECK: ret void
92+
93+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
94+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
95+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
96+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
97+
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
98+
// CHECK: call void @__kmpc_for_static_fini(
99+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
100+
// CHECK: ret void
101+
102+
// CHECK: define internal void [[OUTL3]](
103+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
104+
// CHECK: call void @__kmpc_for_static_fini(
105+
// CHECK: ret void
106+
107+
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
108+
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
109+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
110+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
111+
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
112+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
113+
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
114+
// CHECK: call void @__kmpc_for_static_fini(
115+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
116+
// CHECK: ret void
117+
118+
// CHECK: define internal void [[OUTL4]](
119+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
120+
// CHECK: call void @__kmpc_for_static_fini(
121+
// CHECK: ret void
122+
123+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Test target codegen - host bc file has to be created first.
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
7+
// expected-no-diagnostics
8+
#ifndef HEADER
9+
#define HEADER
10+
11+
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
12+
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
13+
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
14+
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
15+
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
16+
17+
#define N 1000
18+
#define M 10
19+
20+
template<typename tx>
21+
tx ftemplate(int n) {
22+
tx a[N];
23+
short aa[N];
24+
tx b[10];
25+
tx c[M][M];
26+
tx f = n;
27+
tx l;
28+
int k;
29+
30+
#pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128)
31+
for(int i = 0; i < n; i++) {
32+
a[i] = 1;
33+
l = i;
34+
}
35+
36+
#pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64)
37+
for(int i = 0; i < n; i++) {
38+
aa[i] += 1;
39+
}
40+
41+
#pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40)
42+
for(int i = 0; i < 10; i++) {
43+
b[i] += 1;
44+
}
45+
46+
#pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k)
47+
for(int i = 0; i < M; i++) {
48+
for(int j = 0; j < M; j++) {
49+
k = M;
50+
c[i][j] = i+j*f+k;
51+
}
52+
}
53+
54+
return a[0];
55+
}
56+
57+
int bar(int n){
58+
int a = 0;
59+
60+
a += ftemplate<int>(n);
61+
62+
return a;
63+
}
64+
65+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
66+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
67+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
68+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
69+
// CHECK: call void @__kmpc_for_static_fini(
70+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
71+
// CHECK: ret void
72+
73+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
74+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
75+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
76+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
77+
// CHECK: call void @__kmpc_for_static_fini(
78+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
79+
// CHECK: ret void
80+
81+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
82+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
83+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
84+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
85+
// CHECK: call void @__kmpc_for_static_fini(
86+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
87+
// CHECK: ret void
88+
89+
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
90+
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
91+
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
92+
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
93+
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
94+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
95+
// CHECK: call void @__kmpc_for_static_fini(
96+
// CHECK: call void @__kmpc_spmd_kernel_deinit()
97+
// CHECK: ret void
98+
99+
#endif

‎clang/test/OpenMP/target_parallel_debug_codegen.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
2-
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
1+
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
2+
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
33
// expected-no-diagnostics
44

55
int main() {

‎clang/test/OpenMP/target_parallel_for_debug_codegen.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
2-
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
1+
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
2+
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
33
// expected-no-diagnostics
44

55
int main() {

0 commit comments

Comments
 (0)
Please sign in to comment.