diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -244,6 +244,8 @@ LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading") LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") +LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.") +LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2427,6 +2427,14 @@ HelpText<"Enable debugging in the OpenMP offloading device RTL">; def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group, Flags<[NoArgumentUnused]>; def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, + Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, + Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">, + Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, + Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse, PosFlag, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1200,8 +1200,14 @@ llvm_unreachable("OpenMP NVPTX can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); - if (CGM.getLangOpts().OpenMPTargetNewRuntime) - OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug); + if (CGM.getLangOpts().OpenMPTargetNewRuntime) { + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug, + "__omp_rtl_debug_kind"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription, + "__omp_rtl_assume_teams_oversubscription"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription, + "__omp_rtl_assume_threads_oversubscription"); + } } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5815,6 +5815,17 @@ options::OPT_fno_openmp_cuda_force_full_runtime, /*Default=*/false)) CmdArgs.push_back("-fopenmp-cuda-force-full-runtime"); + + // When in OpenMP offloading mode, forward assumptions information about + // thread and team counts in the device. + if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription, + options::OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-teams-oversubscription"); + if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription, + options::OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3486,6 +3486,12 @@ if (Opts.OpenMPTargetNewRuntime) GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA); + if (Opts.OpenMPThreadSubscription) + GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA); + + if (Opts.OpenMPTeamSubscription) + GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA); + if (Opts.OpenMPTargetDebug != 0) GenerateArg(Args, OPT_fopenmp_target_debug_EQ, Twine(Opts.OpenMPTargetDebug), SA); @@ -3928,6 +3934,13 @@ } } + if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) { + if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription)) + Opts.OpenMPTeamSubscription = true; + if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription)) + Opts.OpenMPThreadSubscription = true; + } + // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; diff --git a/clang/test/OpenMP/target_debug_codegen.cpp b/clang/test/OpenMP/target_debug_codegen.cpp deleted file mode 100644 --- a/clang/test/OpenMP/target_debug_codegen.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_debug_kind" -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -//. -// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1 -//. -// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111 -//. -// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0 -//. -void foo() { -#pragma omp target - { } -} - -#endif diff --git a/clang/test/OpenMP/target_globals_codegen.cpp b/clang/test/OpenMP/target_globals_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_globals_codegen.cpp @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_" +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +//. +// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1 +// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111 +// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 1 +//. +// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 1 +// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +void foo() { +#pragma omp target + { } +} + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -683,9 +683,8 @@ omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); - /// Create a global value containing the \p DebugLevel to control debuggin in - /// the module. - GlobalValue *createDebugKind(unsigned DebugLevel); + /// Create a global flag \p Namein the module with initial value \p Value. + GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); /// Generate control flow and cleanup for cancellation. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -245,12 +245,12 @@ assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } -GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) { +GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) { IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); - auto *GV = new GlobalVariable( - M, I32Ty, - /* isConstant = */ true, GlobalValue::WeakODRLinkage, - ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind"); + auto *GV = + new GlobalVariable(M, I32Ty, + /* isConstant = */ true, GlobalValue::WeakODRLinkage, + ConstantInt::get(I32Ty, Value), Name); return GV; }