diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2676,24 +2676,24 @@ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group, Flags<[CC1Option, NoArgumentUnused]>, +def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group, Flags<[CC1Option, FC1Option, NoArgumentUnused]>, HelpText<"Enable debugging in the OpenMP offloading device RTL">; -def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group, Flags<[NoArgumentUnused]>; -def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group, Flags<[NoArgumentUnused, FC1Option]>; +def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group, Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; + Group, Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; + Group, Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>; def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; + Group, Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>; def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; + Group, Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, Group, - Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, + Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag>; def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, Group, - Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, + Flags<[CC1Option, FC1Option, NoArgumentUnused, HelpHidden]>, HelpText<"Assert no nested parallel regions in the GPU">, MarshallingInfoFlag>; def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group, diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -56,6 +56,14 @@ void addTargetOptions(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Extract offload options from the driver arguments and add them to + /// the command arguments. + /// + /// \param [in] JA The job action + /// \param [out] CmdArgs The list of output command arguments + void addOffloadOptions(const JobAction &JA, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Extract other compilation options from the driver arguments and add them /// to the command arguments. /// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -114,6 +114,34 @@ // TODO: Add target specific flags, ABI, mtune option etc. } +void Flang::addOffloadOptions(const JobAction &JA, const ArgList &Args, + ArgStringList &CmdArgs) const { + bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); + + if (IsOpenMPDevice) { + // When in OpenMP offloading mode, enable debugging on the device. + Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ); + if (Args.hasFlag(options::OPT_fopenmp_target_debug, + options::OPT_fno_openmp_target_debug, /*Default=*/false)) + CmdArgs.push_back("-fopenmp-target-debug"); + + // When in OpenMP offloading mode, forward assumptions information about + // thread and team counts in the device. + if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription, + options::OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-teams-oversubscription"); + if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription, + options::OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); + if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) + CmdArgs.push_back("-fopenmp-assume-no-thread-state"); + if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) + CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); + } +} + static void addFloatingPointOptions(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { StringRef FPContract; @@ -311,6 +339,9 @@ // Add target args, features, etc. addTargetOptions(Args, CmdArgs); + // Offloading related options + addOffloadOptions(JA, Args, CmdArgs); + // Add other compile options addOtherOptions(Args, CmdArgs); diff --git a/clang/test/Driver/flang/flang-omp.f90 b/clang/test/Driver/flang/flang-omp.f90 new file mode 100644 --- /dev/null +++ b/clang/test/Driver/flang/flang-omp.f90 @@ -0,0 +1,29 @@ +! Check that flang -fc1 is invoked when in --driver-mode=flang +! and the relevant openmp and openmp offload flags are utilised +! and passed down correctly + +! Test -fopenmp with offload for RTL Flag Options +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-threads-oversubscription %s 2>&1 | FileCheck --check-prefixes=CHECK-THREADS-OVS %s +! CHECK-THREADS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-threads-oversubscription" {{.*}}.f90" + +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-teams-oversubscription %s 2>&1 | FileCheck --check-prefixes=CHECK-TEAMS-OVS %s +! CHECK-TEAMS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-teams-oversubscription" {{.*}}.f90" + +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-no-nested-parallelism %s 2>&1 | FileCheck --check-prefixes=CHECK-NEST-PAR %s +! CHECK-NEST-PAR: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-no-nested-parallelism" {{.*}}.f90" + +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-no-thread-state %s 2>&1 | FileCheck --check-prefixes=CHECK-THREAD-STATE %s +! CHECK-THREAD-STATE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-no-thread-state" {{.*}}.f90" + +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-debug %s 2>&1 | FileCheck --check-prefixes=CHECK-TARGET-DEBUG %s +! CHECK-TARGET-DEBUG: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug" {{.*}}.f90" + +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-debug=111 %s 2>&1 | FileCheck --check-prefixes=CHECK-TARGET-DEBUG-EQ %s +! CHECK-TARGET-DEBUG-EQ: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug=111" {{.*}}.f90" + +! RUN: %clang --driver-mode=flang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \ +! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-no-thread-state \ +! RUN: %s 2>&1 | FileCheck --check-prefixes=CHECK-RTL-ALL %s +! CHECK-RTL-ALL: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug" "-fopenmp-assume-teams-oversubscription" +! CHECK-RTL-ALL: "-fopenmp-assume-threads-oversubscription" "-fopenmp-assume-no-thread-state" "-fopenmp-assume-no-nested-parallelism" +! CHECK-RTL-ALL: {{.*}}.f90" \ No newline at end of file diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -34,6 +34,18 @@ LANGOPT(AssociativeMath, 1, false) /// Allow division operations to be reassociated LANGOPT(ReciprocalMath, 1, false) +/// Enable debugging in the OpenMP offloading device RTL +LANGOPT(OpenMPTargetDebug, 32, 0) +/// Assume work-shared loops do not have more iterations than participating +/// threads. +LANGOPT(OpenMPThreadSubscription, 1, 0) +/// Assume distributed loops do not have more iterations than participating +/// teams. +LANGOPT(OpenMPTeamSubscription, 1, 0) +/// Assume that no thread in a parallel region will modify an ICV. +LANGOPT(OpenMPNoThreadState, 1, 0) +/// Assume that no thread in a parallel region will encounter a parallel region +LANGOPT(OpenMPNoNestedParallelism, 1, 0) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -670,6 +670,41 @@ if (args.hasArg(clang::driver::options::OPT_fopenmp)) { res.getFrontendOpts().features.Enable( Fortran::common::LanguageFeature::OpenMP); + + if (args.hasArg(clang::driver::options::OPT_fopenmp_is_device)) { + if (args.hasFlag( + clang::driver::options::OPT_fopenmp_assume_teams_oversubscription, + clang::driver::options:: + OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + res.getLangOpts().OpenMPTeamSubscription = true; + + if (args.hasArg( + clang::driver::options::OPT_fopenmp_assume_no_thread_state)) + res.getLangOpts().OpenMPNoThreadState = 1; + + if (args.hasArg( + clang::driver::options::OPT_fopenmp_assume_no_nested_parallelism)) + res.getLangOpts().OpenMPNoNestedParallelism = 1; + + if (args.hasFlag(clang::driver::options:: + OPT_fopenmp_assume_threads_oversubscription, + clang::driver::options:: + OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + res.getLangOpts().OpenMPThreadSubscription = true; + + if ((args.hasArg(clang::driver::options::OPT_fopenmp_target_debug) || + args.hasArg(clang::driver::options::OPT_fopenmp_target_debug_EQ))) { + res.getLangOpts().OpenMPTargetDebug = getLastArgIntValue( + args, clang::driver::options::OPT_fopenmp_target_debug_EQ, + res.getLangOpts().OpenMPTargetDebug, diags); + + if (!res.getLangOpts().OpenMPTargetDebug && + args.hasArg(clang::driver::options::OPT_fopenmp_target_debug)) + res.getLangOpts().OpenMPTargetDebug = 1; + } + } } // -pedantic diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -177,6 +177,18 @@ // Fetch module from lb, so we can set mlirModule = std::make_unique(lb.getModule()); + + if (ci.getInvocation().getFrontendOpts().features.IsEnabled( + Fortran::common::LanguageFeature::OpenMP)) { + if (ci.getInvocation().getLangOpts().OpenMPIsDevice) + mlir::omp::OpenMPDialect::setRTLFlags( + *mlirModule, ci.getInvocation().getLangOpts().OpenMPTargetDebug, + ci.getInvocation().getLangOpts().OpenMPTeamSubscription, + ci.getInvocation().getLangOpts().OpenMPThreadSubscription, + ci.getInvocation().getLangOpts().OpenMPNoThreadState, + ci.getInvocation().getLangOpts().OpenMPNoNestedParallelism); + } + setUpTargetMachine(); const llvm::DataLayout &dl = tm->createDataLayout(); setMLIRDataLayout(*mlirModule, dl); diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -138,6 +138,7 @@ ! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-FC1-NEXT: -fopenacc Enable OpenACC +! HELP-FC1-NEXT: -fopenmp-target-debug Enable debugging in the OpenMP offloading device RTL ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated diff --git a/flang/test/Lower/OpenMP/rtl-flags.f90 b/flang/test/Lower/OpenMP/rtl-flags.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/rtl-flags.f90 @@ -0,0 +1,76 @@ +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEFAULT-DEVICE-FIR +!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefix=DEFAULT-HOST-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEFAULT-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-EQ-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-EQ-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=NEST-PAR-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=ALL-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=ALL-DEVICE-LLVM + +!DEFAULT-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags<>} +!DEFAULT-HOST-FIR: module attributes {{{.*}}, omp.is_device = false} +!DEFAULT-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!DBG-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!DBG-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 + +!ALL-DEVICE-FIR: module attributes {{{.*}}, omp.is_device = true, omp.rtlmoduleflags = #omp.rtlmoduleflags} +!ALL-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 +subroutine omp_subroutine() +end subroutine omp_subroutine diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1351,6 +1351,40 @@ return success(); } +/// Lowers the RTLModuleFlagsAttr which is applied to the module on the device +/// pass when offloading, this attribute contains OpenMP RTL globals that can +/// be passed as flags to the frontend, otherwise they are set to default +LogicalResult +convertRTLModuleFlagsAttr(Operation *op, + mlir::omp::RTLModuleFlagsAttr attribute, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + ompBuilder->createGlobalFlag( + attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/, + "__omp_rtl_debug_kind"); + ompBuilder->createGlobalFlag( + attribute + .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/ + , + "__omp_rtl_assume_teams_oversubscription"); + ompBuilder->createGlobalFlag( + attribute + .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/ + , + "__omp_rtl_assume_threads_oversubscription"); + ompBuilder->createGlobalFlag( + attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/, + "__omp_rtl_assume_no_thread_state"); + ompBuilder->createGlobalFlag( + attribute + .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/ + , + "__omp_rtl_assume_no_nested_parallelism"); + + return success(); +} + namespace { /// Implementation of the dialect interface that converts operations belonging @@ -1365,10 +1399,34 @@ LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; + + LogicalResult + amendOperation(Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const final; }; } // namespace +/// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime +/// calls, or operation amendments +LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( + Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const { + + return llvm::TypeSwitch(attribute.getValue()) + .Case([&](mlir::omp::RTLModuleFlagsAttr rtlAttr) { + return convertRTLModuleFlagsAttr(op, rtlAttr, moduleTranslation); + }) + .Default([&](Attribute attr) { + // fall through for omp attributes that do not require lowering and/or + // have no concrete definition and thus no type to define a case on + // e.g. omp.is_device + return success(); + }); + + return failure(); +} + /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(