diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify=all,safe -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out -// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -verify=host -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out // host-no-diagnostics @@ -96,5 +96,5 @@ } } -// all-remark@* 5 {{OpenMP runtime call __kmpc_global_thread_num moved to}} +// all-remark@* 5 {{OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region}} // all-remark@* 12 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}} diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out -// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -verify=host -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out // host-no-diagnostics @@ -43,5 +43,5 @@ } } -// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}} +// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region}} // expected-remark@* 2 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}} diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -743,6 +743,11 @@ OptimizationRemarkMissed(const char *PassName, StringRef RemarkName, const Instruction *Inst); + /// Same as above but \p F is used to derive code region and debug + /// location. + OptimizationRemarkMissed(const char *PassName, StringRef RemarkName, + const Function *F); + static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_OptimizationRemarkMissed; } @@ -795,6 +800,11 @@ OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName, const Instruction *Inst); + /// Same as above but \p F is used to derive code region and debug + /// location. + OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName, + const Function *F); + static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_OptimizationRemarkAnalysis; } diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -291,6 +291,13 @@ *Inst->getParent()->getParent(), Inst->getDebugLoc(), Inst->getParent()) {} +OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName, + StringRef RemarkName, + const Function *Func) + : DiagnosticInfoIROptimization( + DK_OptimizationRemarkMissed, DS_Remark, PassName, RemarkName, *Func, + Func->getSubprogram(), getFirstFunctionBlock(Func)) {} + bool OptimizationRemarkMissed::isEnabled() const { const Function &Fn = getFunction(); LLVMContext &Ctx = Fn.getContext(); @@ -319,6 +326,13 @@ *cast(CodeRegion)->getParent(), Loc, CodeRegion) {} +OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName, + StringRef RemarkName, + const Function *Func) + : DiagnosticInfoIROptimization( + DK_OptimizationRemarkAnalysis, DS_Remark, PassName, RemarkName, *Func, + Func->getSubprogram(), getFirstFunctionBlock(Func)) {} + bool OptimizationRemarkAnalysis::isEnabled() const { const Function &Fn = getFunction(); LLVMContext &Ctx = Fn.getContext(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -581,15 +581,15 @@ for (Function *F : OMPInfoCache.ModuleSlice) { for (auto ICV : ICVs) { auto ICVInfo = OMPInfoCache.ICVs[ICV]; - auto Remark = [&](OptimizationRemark OR) { - return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) - << " Value: " - << (ICVInfo.InitValue - ? ICVInfo.InitValue->getValue().toString(10, true) - : "IMPLEMENTATION_DEFINED"); + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) + << " Value: " + << (ICVInfo.InitValue + ? ICVInfo.InitValue->getValue().toString(10, true) + : "IMPLEMENTATION_DEFINED"); }; - emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); + emitRemark(F, "OpenMPICVTracker", Remark); } } } @@ -600,12 +600,12 @@ if (!OMPInfoCache.Kernels.count(F)) continue; - auto Remark = [&](OptimizationRemark OR) { - return OR << "OpenMP GPU kernel " - << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "OpenMP GPU kernel " + << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; }; - emitRemarkOnFunction(F, "OpenMPGPU", Remark); + emitRemark(F, "OpenMPGPU", Remark); } } @@ -1419,12 +1419,11 @@ continue; auto Remark = [&](OptimizationRemark OR) { - auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); return OR << "OpenMP runtime call " - << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " - << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); + << ore::NV("OpenMPOptRuntime", RFI.Name) + << " moved to beginning of OpenMP region"; }; - emitRemark(CI, "OpenMPRuntimeCodeMotion", Remark); + emitRemark(&F, "OpenMPRuntimeCodeMotion", Remark); CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); ReplVal = CI; @@ -1457,7 +1456,7 @@ return OR << "OpenMP runtime call " << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; }; - emitRemark(CI, "OpenMPRuntimeDeduplicated", Remark); + emitRemark(&F, "OpenMPRuntimeDeduplicated", Remark); CGUpdater.removeCallSite(*CI); CI->replaceAllUsesWith(ReplVal); @@ -1558,28 +1557,22 @@ /// /// The remark is built using a callback function provided by the caller that /// takes a RemarkKind as input and returns a RemarkKind. - template > - void emitRemark(Instruction *Inst, StringRef RemarkName, + template + void emitRemark(Instruction *I, StringRef RemarkName, RemarkCallBack &&RemarkCB) const { - Function *F = Inst->getParent()->getParent(); + Function *F = I->getParent()->getParent(); auto &ORE = OREGetter(F); - ORE.emit( - [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); + ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); }); } - /// Emit a remark on a function. Since only OptimizationRemark is supporting - /// this, it can't be made generic. - void - emitRemarkOnFunction(Function *F, StringRef RemarkName, - function_ref - &&RemarkCB) const { + /// Emit a remark on a function. + template + void emitRemark(Function *F, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { auto &ORE = OREGetter(F); - ORE.emit([&]() { - return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); - }); + ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); }); } /// The underlying module. @@ -1672,10 +1665,11 @@ if (!F.hasLocalLinkage()) { // See https://openmp.llvm.org/remarks/OptimizationRemarks.html - auto Remark = [&](OptimizationRemark OR) { - return OR << "[OMP100] Potentially unknown OpenMP target region caller"; + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA + << "[OMP100] Potentially unknown OpenMP target region caller"; }; - emitRemarkOnFunction(&F, "OMP100", Remark); + emitRemark(&F, "OMP100", Remark); return nullptr; } @@ -1768,15 +1762,16 @@ continue; { - auto Remark = [&](OptimizationRemark OR) { - return OR << "Found a parallel region that is called in a target " - "region but not part of a combined target construct nor " - "nested inside a target construct without intermediate " - "code. This can lead to excessive register usage for " - "unrelated target regions in the same translation unit " - "due to spurious call edges assumed by ptxas."; + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Found a parallel region that is called in a target " + "region but not part of a combined target construct nor " + "nested inside a target construct without intermediate " + "code. This can lead to excessive register usage for " + "unrelated target regions in the same translation unit " + "due to spurious call edges assumed by ptxas."; }; - emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); + emitRemark(F, "OpenMPParallelRegionInNonSPMD", + Remark); } // If this ever hits, we should investigate. @@ -1785,12 +1780,13 @@ if (UnknownUse || NumDirectCalls != 1 || ToBeReplacedStateMachineUses.size() != 2) { { - auto Remark = [&](OptimizationRemark OR) { - return OR << "Parallel region is used in " - << (UnknownUse ? "unknown" : "unexpected") - << " ways; will not attempt to rewrite the state machine."; + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Parallel region is used in " + << (UnknownUse ? "unknown" : "unexpected") + << " ways; will not attempt to rewrite the state machine."; }; - emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); + emitRemark( + F, "OpenMPParallelRegionInNonSPMD", Remark); } continue; } @@ -1800,14 +1796,14 @@ Kernel K = getUniqueKernelFor(*F); if (!K) { { - auto Remark = [&](OptimizationRemark OR) { - return OR << "Parallel region is not known to be called from a " - "unique single target region, maybe the surrounding " - "function has external linkage?; will not attempt to " - "rewrite the state machine use."; + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Parallel region is not known to be called from a " + "unique single target region, maybe the surrounding " + "function has external linkage?; will not attempt to " + "rewrite the state machine use."; }; - emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl", - Remark); + emitRemark( + F, "OpenMPParallelRegionInMultipleKernesl", Remark); } continue; } @@ -1818,25 +1814,26 @@ // ensures only direct calls to the function are left. { - auto RemarkParalleRegion = [&](OptimizationRemark OR) { - return OR << "Specialize parallel region that is only reached from a " - "single target region to avoid spurious call edges and " - "excessive register usage in other target regions. " - "(parallel region ID: " - << ore::NV("OpenMPParallelRegion", F->getName()) - << ", kernel ID: " - << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; + auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Specialize parallel region that is only reached from a " + "single target region to avoid spurious call edges and " + "excessive register usage in other target regions. " + "(parallel region ID: " + << ore::NV("OpenMPParallelRegion", F->getName()) + << ", kernel ID: " + << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; }; - emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", - RemarkParalleRegion); - auto RemarkKernel = [&](OptimizationRemark OR) { - return OR << "Target region containing the parallel region that is " - "specialized. (parallel region ID: " - << ore::NV("OpenMPParallelRegion", F->getName()) - << ", kernel ID: " - << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; + emitRemark(F, "OpenMPParallelRegionInNonSPMD", + RemarkParalleRegion); + auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Target region containing the parallel region that is " + "specialized. (parallel region ID: " + << ore::NV("OpenMPParallelRegion", F->getName()) + << ", kernel ID: " + << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; }; - emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel); + emitRemark(K, "OpenMPParallelRegionInNonSPMD", + RemarkKernel); } Module &M = *F->getParent(); diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll --- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll @@ -10,9 +10,9 @@ @0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -; CHECK: remark: deduplication_remarks.c:9:10: OpenMP runtime call __kmpc_global_thread_num moved to deduplication_remarks.c:5:10 -; CHECK: remark: deduplication_remarks.c:7:10: OpenMP runtime call __kmpc_global_thread_num deduplicated -; CHECK: remark: deduplication_remarks.c:5:10: OpenMP runtime call __kmpc_global_thread_num deduplicated +; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region +; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num deduplicated +; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num deduplicated define dso_local void @deduplicate() local_unnamed_addr !dbg !14 { %1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !21 call void @useI32(i32 %1), !dbg !23 diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll --- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll +++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel -; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel +; RUN: opt -passes=openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel +; RUN: opt -openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel ; CHECK-DAG: remark: :0:0: OpenMP GPU kernel kernel1 ; CHECK-DAG: remark: :0:0: OpenMP GPU kernel kernel2 diff --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll --- a/llvm/test/Transforms/OpenMP/icv_remarks.ll +++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -openmp-opt-cgscc -pass-remarks-analysis=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s ; ModuleID = 'icv_remarks.c' source_filename = "icv_remarks.c"