diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -278,6 +278,7 @@ "unknown remark serializer format: '%0'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">; +def err_drv_debug_no_new_runtime : Error<"OpenMP target device debugging enabled with incompatible runtime">; def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">; def err_drv_omp_host_ir_file_not_found : Error< "provided host compiler IR file '%0' is required to generate code for OpenMP " diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -242,6 +242,7 @@ LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.") LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.") LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading") +LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") LANGOPT(RenderScript , 1, 0, "RenderScript") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2410,6 +2410,10 @@ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group, Flags<[CC1Option, NoArgumentUnused]>, + HelpText<"Enable debugging in the OpenMP offloading device RTL">; +def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group, Flags<[NoArgumentUnused]>; +def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse, PosFlag, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1197,6 +1197,10 @@ : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); + + llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); + if (CGM.getLangOpts().OpenMPTargetNewRuntime) + OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug); } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5766,6 +5766,19 @@ options::OPT_fno_openmp_cuda_mode, /*Default=*/false)) CmdArgs.push_back("-fopenmp-cuda-mode"); + // When in OpenMP offloading mode, enable or disable the new device + // runtime. + if (Args.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-target-new-runtime"); + + // When in OpenMP offloading mode, enable debugging on the device. + Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ); + if (Args.hasFlag(options::OPT_fopenmp_target_debug, + options::OPT_fno_openmp_target_debug, /*Default=*/false)) + CmdArgs.push_back("-fopenmp-target-debug"); + // When in OpenMP offloading mode with NVPTX target, check if full runtime // is required. if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3460,6 +3460,13 @@ GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA); } + if (Opts.OpenMPTargetNewRuntime) + GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA); + + if (Opts.OpenMPTargetDebug != 0) + GenerateArg(Args, OPT_fopenmp_target_debug_EQ, + Twine(Opts.OpenMPTargetDebug), SA); + if (Opts.OpenMPCUDANumSMs != 0) GenerateArg(Args, OPT_fopenmp_cuda_number_of_sm_EQ, Twine(Opts.OpenMPCUDANumSMs), SA); @@ -3838,6 +3845,8 @@ Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder); bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); + Opts.OpenMPTargetNewRuntime = + Opts.OpenMPIsDevice && Args.hasArg(options::OPT_fopenmp_target_new_runtime); Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice; @@ -3865,6 +3874,7 @@ // handling code for those requiring so. if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) || Opts.OpenCLCPlusPlus) { + Opts.Exceptions = 0; Opts.CXXExceptions = 0; } @@ -3880,6 +3890,20 @@ Opts.OpenMPCUDAReductionBufNum, Diags); } + // Set the value of the debugging flag used in the new offloading device RTL. + // Set either by a specific value or to a default if not specified. + if (Opts.OpenMPIsDevice && (Args.hasArg(OPT_fopenmp_target_debug) || + Args.hasArg(OPT_fopenmp_target_debug_EQ))) { + if (Opts.OpenMPTargetNewRuntime) { + Opts.OpenMPTargetDebug = getLastArgIntValue( + Args, OPT_fopenmp_target_debug_EQ, Opts.OpenMPTargetDebug, Diags); + if (!Opts.OpenMPTargetDebug && Args.hasArg(OPT_fopenmp_target_debug)) + Opts.OpenMPTargetDebug = 1; + } else { + Diags.Report(diag::err_drv_debug_no_new_runtime); + } + } + // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; diff --git a/clang/test/OpenMP/target_debug_codegen.cpp b/clang/test/OpenMP/target_debug_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_debug_codegen.cpp @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +//. +// CHECK: @__omp_rtl_debug_kind = private constant i32 1 +// CHECK: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +// CHECK: @__omp_offloading_fd02_613a0c37__Z3foov_l25_exec_mode = weak constant i8 1 +// CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_613a0c37__Z3foov_l25_exec_mode], section "llvm.metadata" +//. +// CHECK-EQ: @__omp_rtl_debug_kind = private constant i32 111 +// CHECK-EQ: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-EQ: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +// CHECK-EQ: @__omp_offloading_fd02_613a0c37__Z3foov_l25_exec_mode = weak constant i8 1 +// CHECK-EQ: @llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_613a0c37__Z3foov_l25_exec_mode], section "llvm.metadata" +//. +void foo() { +#pragma omp target + { } +} + +#endif +// +// +// +//. +// CHECK: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-EQ: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK: !0 = !{i32 0, i32 64770, i32 1631194167, !"_Z3foov", i32 25, i32 0} +// CHECK: !1 = !{void ()* @__omp_offloading_fd02_613a0c37__Z3foov_l25, !"kernel", i32 1} +// CHECK: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !3 = !{i32 7, !"openmp", i32 50} +// CHECK: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK: !5 = !{!"clang version 14.0.0"} +//. +// CHECK-EQ: !0 = !{i32 0, i32 64770, i32 1631194167, !"_Z3foov", i32 25, i32 0} +// CHECK-EQ: !1 = !{void ()* @__omp_offloading_fd02_613a0c37__Z3foov_l25, !"kernel", i32 1} +// CHECK-EQ: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-EQ: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-EQ: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-EQ: !5 = !{!"clang version 14.0.0"} +//. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -683,6 +683,10 @@ omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); + /// Create a global value containing the \p DebugLevel to control debuggin in + /// the module. + GlobalValue *createDebugKind(unsigned DebugLevel); + /// Generate control flow and cleanup for cancellation. /// /// \param CancelFlag Flag indicating if the cancellation is performed. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -244,6 +244,16 @@ assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } +GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) { + IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); + auto *GV = new GlobalVariable( + M, I32Ty, + /* isConstant = */ true, GlobalValue::PrivateLinkage, + ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind"); + + return GV; +} + Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, IdentFlag LocFlags, unsigned Reserve2Flags) {