diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -109,6 +109,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/AbstractCallSite.h" @@ -1073,6 +1074,10 @@ /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct Attributor { + + using OptimizationRemarkGetter = + function_ref; + /// Constructor /// /// \param Functions The set of functions we are deriving attributes for. @@ -1088,7 +1093,29 @@ bool RewriteSignatures = true) : Allocator(InfoCache.Allocator), Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), - DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures) {} + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), + OREGetter(None), PassName("") {} + + /// Constructor + /// + /// \param Functions The set of functions we are deriving attributes for. + /// \param InfoCache Cache to hold various information accessible for + /// the abstract attributes. + /// \param CGUpdater Helper to update an underlying call graph. + /// \param Allowed If not null, a set limiting the attribute opportunities. + /// \param DeleteFns Whether to delete functions + /// \param OREGetter A callback function that returns an ORE object from a + /// Function pointer. + /// \param PassName The name of the pass emitting remarks. + Attributor(SetVector &Functions, InformationCache &InfoCache, + CallGraphUpdater &CGUpdater, DenseSet *Allowed, + bool DeleteFns, bool RewriteSignatures, + OptimizationRemarkGetter OREGetter, const char *PassName) + : Allocator(InfoCache.Allocator), Functions(Functions), + InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), + OREGetter(Optional(OREGetter)), + PassName(PassName) {} ~Attributor(); @@ -1455,6 +1482,41 @@ const AbstractAttribute &QueryingAA, const Value &V, DepClassTy LivenessDepClass = DepClassTy::OPTIONAL); + /// Emit a remark generically. + /// + /// This template function can be used to generically emit a remark. The + /// RemarkKind should be one of the following: + /// - OptimizationRemark to indicate a successful optimization attempt + /// - OptimizationRemarkMissed to report a failed optimization attempt + /// - OptimizationRemarkAnalysis to provide additional information about an + /// optimization attempt + /// + /// The remark is built using a callback function \p RemarkCB that takes a + /// RemarkKind as input and returns a RemarkKind. + template + void emitRemark(Instruction *I, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + if (!OREGetter) + return; + + Function *F = I->getFunction(); + auto &ORE = OREGetter.getValue()(F); + + ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); + } + + /// Emit a remark on a function. + template + void emitRemark(Function *F, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + if (!OREGetter) + return; + + auto &ORE = OREGetter.getValue()(F); + + ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); + } + /// Helper struct used in the communication between an abstract attribute (AA) /// that wants to change the signature of a function and the Attributor which /// applies the changes. The struct is partially initialized with the @@ -1791,6 +1853,12 @@ SmallDenseSet ToBeDeletedInsts; ///} + /// Callback to get an OptimizationRemarkEmitter from a Function *. + Optional OREGetter; + + /// The name of the pass to emit remarks for. + const char *PassName = ""; + friend AADepGraph; }; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5053,6 +5053,11 @@ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall << "\n"); + auto Remark = [&](OptimizationRemark OR) { + return OR << "Moving memory allocation from the heap to the stack."; + }; + A.emitRemark(MallocCall, "HeapToStack", Remark); + Align Alignment; Value *Size; if (isCallocLikeFn(MallocCall, TLI)) { diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2534,6 +2534,15 @@ auto *NewBuffer = ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); + auto Remark = [&](OptimizationRemark OR) { + return OR << "Replaced globalized variable with " + << ore::NV("SharedMemory", AllocSize->getZExtValue()) + << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") + << "of shared memory"; + }; + A.emitRemark(CB, "OpenMPReplaceGlobalization", + Remark); + SharedMem->setAlignment(MaybeAlign(8)); A.changeValueAfterManifest(*CB, *NewBuffer); @@ -2688,7 +2697,8 @@ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, OREGetter, + DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -2743,7 +2753,8 @@ OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, OREGetter, + DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -2819,7 +2830,8 @@ *(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, + OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); return OMPOpt.run(false); diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -1,8 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving memory allocation from the heap to the stack. + @S = external local_unnamed_addr global i8* define void @kernel() { @@ -26,7 +29,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4) + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !8 call void @use(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -81,3 +84,6 @@ !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{void ()* @kernel, !"kernel", i32 1} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 2, column: 2, scope: !6) diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -1,9 +1,12 @@ ; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" @S = external local_unnamed_addr global i8* +; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory +; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory ; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef ; CHECK: [[SHARED_Y:@.+]] = internal addrspace(3) global [4 x i8] undef @@ -68,7 +71,6 @@ define void @use(i8* %x) { entry: - %addr = alloca i8* store i8* %x, i8** @S ret void }