diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2680,26 +2680,39 @@ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group, Flags<[CC1Option, NoArgumentUnused]>, + +//===----------------------------------------------------------------------===// +// Shared cc1 + fc1 OpenMP Target Options +//===----------------------------------------------------------------------===// + +let Flags = [CC1Option, FC1Option, NoArgumentUnused] in { +let Group = f_Group in { + +def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, HelpText<"Enable debugging in the OpenMP offloading device RTL">; -def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group, Flags<[NoArgumentUnused]>; -def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, - Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; -def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, Group, - Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, +def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">; + +} // let Group = f_Group +} // let Flags = [CC1Option, FC1Option, NoArgumentUnused] + +let Flags = [CC1Option, FC1Option, NoArgumentUnused, HelpHidden] in { +let Group = f_Group in { + +def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">; +def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">; +def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">; +def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">; +def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">; +def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag>; -def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, Group, - Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, +def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, HelpText<"Assert no nested parallel regions in the GPU">, MarshallingInfoFlag>; + +} // let Group = f_Group +} // let Flags = [CC1Option, FC1Option, NoArgumentUnused, HelpHidden] + def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group, Flags<[CC1Option, NoArgumentUnused]>, HelpText<"Do not create a host fallback if offloading to the device fails.">, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -157,6 +157,27 @@ // generating code for a device, so that only the relevant code is // emitted. CmdArgs.push_back("-fopenmp-is-device"); + + // When in OpenMP offloading mode, enable debugging on the device. + Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ); + if (Args.hasFlag(options::OPT_fopenmp_target_debug, + options::OPT_fno_openmp_target_debug, /*Default=*/false)) + CmdArgs.push_back("-fopenmp-target-debug"); + + // When in OpenMP offloading mode, forward assumptions information about + // thread and team counts in the device. + if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription, + options::OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-teams-oversubscription"); + if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription, + options::OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); + if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) + CmdArgs.push_back("-fopenmp-assume-no-thread-state"); + if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) + CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); } } diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -36,6 +36,18 @@ LANGOPT(ReciprocalMath, 1, false) /// Generate code only for OpenMP target device LANGOPT(OpenMPIsDevice, 1, false) +/// Enable debugging in the OpenMP offloading device RTL +LANGOPT(OpenMPTargetDebug, 32, 0) +/// Assume work-shared loops do not have more iterations than participating +/// threads. +LANGOPT(OpenMPThreadSubscription, 1, 0) +/// Assume distributed loops do not have more iterations than participating +/// teams. +LANGOPT(OpenMPTeamSubscription, 1, 0) +/// Assume that no thread in a parallel region will modify an ICV. +LANGOPT(OpenMPNoThreadState, 1, 0) +/// Assume that no thread in a parallel region will encounter a parallel region +LANGOPT(OpenMPNoNestedParallelism, 1, 0) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -13,17 +13,53 @@ #ifndef FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H #define FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H +#include "flang/Frontend/LangOptions.h" +#include + #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/BuiltinOps.h" +struct OffloadModuleOpts { + OffloadModuleOpts() {} + OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription, + bool OpenMPThreadSubscription, bool OpenMPNoThreadState, + bool OpenMPNoNestedParallelism, bool OpenMPIsDevice) + : OpenMPTargetDebug(OpenMPTargetDebug), + OpenMPTeamSubscription(OpenMPTeamSubscription), + OpenMPThreadSubscription(OpenMPThreadSubscription), + OpenMPNoThreadState(OpenMPNoThreadState), + OpenMPNoNestedParallelism(OpenMPNoNestedParallelism), + OpenMPIsDevice(OpenMPIsDevice) {} + + OffloadModuleOpts(Fortran::frontend::LangOptions &Opts) + : OpenMPTargetDebug(Opts.OpenMPTargetDebug), + OpenMPTeamSubscription(Opts.OpenMPTeamSubscription), + OpenMPThreadSubscription(Opts.OpenMPThreadSubscription), + OpenMPNoThreadState(Opts.OpenMPNoThreadState), + OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism), + OpenMPIsDevice(Opts.OpenMPIsDevice) {} + + uint32_t OpenMPTargetDebug = 0; + bool OpenMPTeamSubscription = false; + bool OpenMPThreadSubscription = false; + bool OpenMPNoThreadState = false; + bool OpenMPNoNestedParallelism = false; + bool OpenMPIsDevice = false; +}; + // Shares assinging of the OpenMP OffloadModuleInterface and its assorted // attributes accross Flang tools (bbc/flang) void setOffloadModuleInterfaceAttributes( - mlir::ModuleOp &module, bool isDevice) { + mlir::ModuleOp &module, OffloadModuleOpts Opts) { // Should be registered by the OpenMPDialect if (auto offloadMod = llvm::dyn_cast( module.getOperation())) { - offloadMod.setIsDevice(isDevice); + offloadMod.setIsDevice(Opts.OpenMPIsDevice); + if (Opts.OpenMPIsDevice) { + offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription, + Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState, + Opts.OpenMPNoNestedParallelism); + } } } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -721,6 +721,39 @@ if (args.hasArg(clang::driver::options::OPT_fopenmp_is_device)) { res.getLangOpts().OpenMPIsDevice = 1; + + if (args.hasFlag( + clang::driver::options::OPT_fopenmp_assume_teams_oversubscription, + clang::driver::options:: + OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + res.getLangOpts().OpenMPTeamSubscription = true; + + if (args.hasArg( + clang::driver::options::OPT_fopenmp_assume_no_thread_state)) + res.getLangOpts().OpenMPNoThreadState = 1; + + if (args.hasArg( + clang::driver::options::OPT_fopenmp_assume_no_nested_parallelism)) + res.getLangOpts().OpenMPNoNestedParallelism = 1; + + if (args.hasFlag(clang::driver::options:: + OPT_fopenmp_assume_threads_oversubscription, + clang::driver::options:: + OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + res.getLangOpts().OpenMPThreadSubscription = true; + + if ((args.hasArg(clang::driver::options::OPT_fopenmp_target_debug) || + args.hasArg(clang::driver::options::OPT_fopenmp_target_debug_EQ))) { + res.getLangOpts().OpenMPTargetDebug = getLastArgIntValue( + args, clang::driver::options::OPT_fopenmp_target_debug_EQ, + res.getLangOpts().OpenMPTargetDebug, diags); + + if (!res.getLangOpts().OpenMPTargetDebug && + args.hasArg(clang::driver::options::OPT_fopenmp_target_debug)) + res.getLangOpts().OpenMPTargetDebug = 1; + } } } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -278,8 +278,8 @@ if (ci.getInvocation().getFrontendOpts().features.IsEnabled( Fortran::common::LanguageFeature::OpenMP)) { - setOffloadModuleInterfaceAttributes( - *mlirModule, ci.getInvocation().getLangOpts().OpenMPIsDevice); + setOffloadModuleInterfaceAttributes(*mlirModule, + ci.getInvocation().getLangOpts()); } if (!setUpTargetMachine()) diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -145,6 +145,7 @@ ! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp-is-device Generate code only for an OpenMP target device. +! HELP-FC1-NEXT: -fopenmp-target-debug Enable debugging in the OpenMP offloading device RTL ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated diff --git a/flang/test/Driver/omp-frontend-forwarding.f90 b/flang/test/Driver/omp-frontend-forwarding.f90 --- a/flang/test/Driver/omp-frontend-forwarding.f90 +++ b/flang/test/Driver/omp-frontend-forwarding.f90 @@ -20,3 +20,50 @@ ! CHECK-OPENMP-EMBED-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" {{.*}}.f90" ! CHECK-OPENMP-EMBED: "{{[^"]*}}clang-offload-packager{{.*}} "--image=file={{.*}}.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp" ! CHECK-OPENMP-EMBED-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-fopenmp" {{.*}} "-fembed-offload-object={{.*}}.out" {{.*}}.bc" + +! Test -fopenmp with offload for RTL Flag Options +! RUN: %flang -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-assume-threads-oversubscription \ +! RUN: | FileCheck %s --check-prefixes=CHECK-THREADS-OVS +! CHECK-THREADS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-threads-oversubscription" {{.*}}.f90" + +! RUN: %flang -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-assume-teams-oversubscription \ +! RUN: | FileCheck %s --check-prefixes=CHECK-TEAMS-OVS +! CHECK-TEAMS-OVS: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-teams-oversubscription" {{.*}}.f90" + +! RUN: %flang -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-assume-no-nested-parallelism \ +! RUN: | FileCheck %s --check-prefixes=CHECK-NEST-PAR +! CHECK-NEST-PAR: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-no-nested-parallelism" {{.*}}.f90" + +! RUN: %flang -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-assume-no-thread-state \ +! RUN: | FileCheck %s --check-prefixes=CHECK-THREAD-STATE +! CHECK-THREAD-STATE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-assume-no-thread-state" {{.*}}.f90" + +! RUN: %flang -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-target-debug \ +! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG +! CHECK-TARGET-DEBUG: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug" {{.*}}.f90" + +! RUN: %flang -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-target-debug \ +! RUN: | FileCheck %s --check-prefixes=CHECK-TARGET-DEBUG +! CHECK-TARGET-DEBUG-EQ: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug=111" {{.*}}.f90" + +! RUN: %flang -S -### %s -o %t 2>&1 \ +! RUN: -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +! RUN: -fopenmp-target-debug -fopenmp-assume-threads-oversubscription \ +! RUN: -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism \ +! RUN: -fopenmp-assume-no-thread-state \ +! RUN: | FileCheck %s --check-prefixes=CHECK-RTL-ALL +! CHECK-RTL-ALL: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" {{.*}} "-fopenmp-is-device" "-fopenmp-target-debug" "-fopenmp-assume-teams-oversubscription" +! CHECK-RTL-ALL: "-fopenmp-assume-threads-oversubscription" "-fopenmp-assume-no-thread-state" "-fopenmp-assume-no-nested-parallelism" +! CHECK-RTL-ALL: {{.*}}.f90" diff --git a/flang/test/Lower/OpenMP/rtl-flags.f90 b/flang/test/Lower/OpenMP/rtl-flags.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/rtl-flags.f90 @@ -0,0 +1,84 @@ +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEFAULT-DEVICE-FIR +!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefix=DEFAULT-HOST-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEFAULT-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-EQ-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DBG-EQ-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=NEST-PAR-DEVICE-LLVM +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-target-debug -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=ALL-DEVICE-FIR +!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-target-debug -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=ALL-DEVICE-LLVM +!RUN: bbc -emit-fir -fopenmp -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=DEFAULT-DEVICE-FIR +!RUN: bbc -emit-fir -fopenmp -o - %s | FileCheck %s --check-prefix=DEFAULT-HOST-FIR +!RUN: bbc -emit-fir -fopenmp -fopenmp-target-debug=111 -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=DBG-EQ-DEVICE-FIR +!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=TEAMS-OSUB-DEVICE-FIR +!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-threads-oversubscription -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=THREAD-OSUB-DEVICE-FIR +!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-no-thread-state -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=THREAD-STATE-DEVICE-FIR +!RUN: bbc -emit-fir -fopenmp -fopenmp-assume-no-nested-parallelism -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=NEST-PAR-DEVICE-FIR +!RUN: bbc -emit-fir -fopenmp -fopenmp-target-debug=1 -fopenmp-assume-teams-oversubscription -fopenmp-assume-no-nested-parallelism -fopenmp-assume-threads-oversubscription -fopenmp-assume-no-thread-state -fopenmp-is-device -o - %s | FileCheck %s --check-prefix=ALL-DEVICE-FIR + +!DEFAULT-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags<>, omp.is_device = #omp.isdevice{{.*}}} +!DEFAULT-HOST-FIR: module attributes {{{.*}}, omp.is_device = #omp.isdevice{{.*}}} +!DEFAULT-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!DEFAULT-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!DBG-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!DBG-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!DBG-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!DBG-EQ-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!DBG-EQ-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!TEAMS-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!TEAMS-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!THREAD-OSUB-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!THREAD-OSUB-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!THREAD-STATE-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 +!THREAD-STATE-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 + +!NEST-PAR-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +!NEST-PAR-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 + +!ALL-DEVICE-FIR: module attributes {{{.*}}, omp.flags = #omp.flags{{.*}}} +!ALL-DEVICE-LLVM: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 +!ALL-DEVICE-LLVM: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 +subroutine omp_subroutine() +end subroutine omp_subroutine diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -129,6 +129,38 @@ llvm::cl::desc("enable openmp device compilation"), llvm::cl::init(false)); +// A simplified subset of the OpenMP RTL Flags from Flang, only the primary +// positive options are available, no negative options e.g. fopen_assume* vs +// fno_open_assume* +static llvm::cl::opt setOpenMPTargetDebug( + "fopenmp-target-debug", + llvm::cl::desc("Enable debugging in the OpenMP offloading device RTL"), + llvm::cl::init(0)); + +static llvm::cl::opt setOpenMPThreadSubscription( + "fopenmp-assume-threads-oversubscription", + llvm::cl::desc("Assume work-shared loops do not have more " + "iterations than participating threads."), + llvm::cl::init(false)); + +static llvm::cl::opt setOpenMPTeamSubscription( + "fopenmp-assume-teams-oversubscription", + llvm::cl::desc("Assume distributed loops do not have more iterations than " + "participating teams."), + llvm::cl::init(false)); + +static llvm::cl::opt setOpenMPNoThreadState( + "fopenmp-assume-no-thread-state", + llvm::cl::desc( + "Assume that no thread in a parallel region will modify an ICV."), + llvm::cl::init(false)); + +static llvm::cl::opt setOpenMPNoNestedParallelism( + "fopenmp-assume-no-nested-parallelism", + llvm::cl::desc("Assume that no thread in a parallel region will encounter " + "a parallel region."), + llvm::cl::init(false)); + static llvm::cl::opt enableOpenACC("fopenacc", llvm::cl::desc("enable openacc"), llvm::cl::init(false)); @@ -244,8 +276,13 @@ kindMap, loweringOptions, {}); burnside.lower(parseTree, semanticsContext); mlir::ModuleOp mlirModule = burnside.getModule(); - if (enableOpenMP) - setOffloadModuleInterfaceAttributes(mlirModule, enableOpenMPDevice); + if (enableOpenMP) { + auto offloadModuleOpts = + OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription, + setOpenMPThreadSubscription, setOpenMPNoThreadState, + setOpenMPNoNestedParallelism, enableOpenMPDevice); + setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts); + } std::error_code ec; std::string outputName = outputFilename; if (!outputName.size()) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1542,6 +1542,38 @@ return bodyGenStatus; } +/// Lowers the FlagsAttr which is applied to the module on the device +/// pass when offloading, this attribute contains OpenMP RTL globals that can +/// be passed as flags to the frontend, otherwise they are set to default +LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + ompBuilder->createGlobalFlag( + attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/, + "__omp_rtl_debug_kind"); + ompBuilder->createGlobalFlag( + attribute + .getAssumeTeamsOversubscription() /*LangOpts().OpenMPTeamSubscription*/ + , + "__omp_rtl_assume_teams_oversubscription"); + ompBuilder->createGlobalFlag( + attribute + .getAssumeThreadsOversubscription() /*LangOpts().OpenMPThreadSubscription*/ + , + "__omp_rtl_assume_threads_oversubscription"); + ompBuilder->createGlobalFlag( + attribute.getAssumeNoThreadState() /*LangOpts().OpenMPNoThreadState*/, + "__omp_rtl_assume_no_thread_state"); + ompBuilder->createGlobalFlag( + attribute + .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/ + , + "__omp_rtl_assume_no_nested_parallelism"); + + return success(); +} + namespace { /// Implementation of the dialect interface that converts operations belonging @@ -1556,10 +1588,34 @@ LogicalResult convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; + + LogicalResult + amendOperation(Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const final; }; } // namespace +/// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime +/// calls, or operation amendments +LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( + Operation *op, NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const { + + return llvm::TypeSwitch(attribute.getValue()) + .Case([&](mlir::omp::FlagsAttr rtlAttr) { + return convertFlagsAttr(op, rtlAttr, moduleTranslation); + }) + .Default([&](Attribute attr) { + // fall through for omp attributes that do not require lowering and/or + // have no concrete definition and thus no type to define a case on + // e.g. omp.is_device + return success(); + }); + + return failure(); +} + /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(